From df3d07beee7ea1a15bafa6bdfdf73e5d28a72338 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 24 Nov 2020 14:47:42 +0800 Subject: [PATCH 01/30] =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 30 +++++++++++++++++++++++------ README.zh-cn.md | 50 +++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 68 insertions(+), 12 deletions(-) diff --git a/README.en.md b/README.en.md index 3cd51b3..deccfe5 100644 --- a/README.en.md +++ b/README.en.md @@ -734,9 +734,9 @@ shadow_root_element.is_valid() # Returns whether the element is still in dom -## Docking with selenium code +## Splicing with selenium or requests code -The DrissionPage code can be seamlessly spliced with the selenium code, either directly using the selenium WebDriver object, or using its own WebDriver everywhere for the selenium code. Make the migration of existing projects very convenient. +DrissionPage code can be seamlessly spliced with selenium and requests code. You can use Selenium's WebDriver object directly, or you can export your own WebDriver to selenium code. The Session object of requests can also be passed directly. Make the migration of existing projects very convenient. ### selenium to DrissionPage @@ -745,11 +745,10 @@ driver = webdriver.Chrome() driver.get('https://www.baidu.com') page = MixPage(Drission(driver)) # Pass the driver to Drission, create a MixPage object -print(page.title) # Print result: You will know by clicking on Baidu +print(page.title) # Print result: 百度一下,你就知道 +element = driver.find_element_by_xpath('//div') # Use selenium native functions ``` - - ### DrissionPage to selenium ```python @@ -757,7 +756,26 @@ page = MixPage() page.get('https://www.baidu.com') driver = page.driver # Get the WebDriver object from the MixPage object -print(driver.title) # Print results: You will know by clicking on Baidu +print(driver.title) # Print results: 百度一下,你就知道 +``` + +### requests to DrissionPage + +``` python +session = requets.Session() +drission = Drission(session_or_options=session) +page = MixPage(drission, mode='s') + +page.get('https://www.baidu.com') +``` + +### DrissionPage to requests + +```python +page = MixPage('s') +session = page.session + +response = session.get('https://www.baidu.com') ``` diff --git a/README.zh-cn.md b/README.zh-cn.md index 99ac7f2..fe43c9c 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -732,11 +732,11 @@ shadow_root_element.is_valid() # 返回元素是否还在 dom 内 -## 与 selenium 代码对接 +## 与 selenium 及 requests 代码对接 -DrissionPage 代码可与 selenium 代码无缝拼接,既可直接使用 selenium 的 WebDriver 对象,也可到处自身的 WebDriver 给 selenium 代码使用。使已有项目的迁移非常方便。 +DrissionPage 代码可与 selenium 及 requests 代码无缝拼接。既可直接使用 selenium 的 WebDriver 对象,也可导出自身的 WebDriver 给 selenium 代码使用。requests 的 Session 对象也可直接传递。使已有项目的迁移非常方便。 -### selenium 转 DrissionPage +### selenium 转 DrissionPage ```python driver = webdriver.Chrome() @@ -746,9 +746,7 @@ page = MixPage(Drission(driver)) # 把 driver 传递给 Drission,创建 MixPa print(page.title) # 打印结果:百度一下,你就知道 ``` - - -### DrissionPage 转 selenium +### DrissionPage 转 selenium ```python page = MixPage() @@ -756,8 +754,48 @@ page.get('https://www.baidu.com') driver = page.driver # 从 MixPage 对象中获取 WebDriver 对象 print(driver.title) # 打印结果:百度一下,你就知道 +element = driver.find_element_by_xpath('//div') # 使用 selenium 原生功能 ``` +### requests 转 DrissionPage + +``` python +session = requets.Session() +drission = Drission(session_or_options=session) +page = MixPage(drission, mode='s') + +page.get('https://www.baidu.com') +``` + +### DrissionPage 转 requests + +```python +page = MixPage('s') +session = page.session + +response = session.get('https://www.baidu.com') +``` + + + +## requests 功能使用 + +### 连接参数 + +除了在创建时传入配置信息及连接参数,如有特别要求,s 模式下也可在每次访问网址时设置连接参数。 + +```python + +``` + +Tips:如果连接参数内没有指定,s 模式会根据当前域名自动填写 Host 和 Referer 属性。 + + + +### Response 对象 + + + ## 下载文件 From fbe249e24ade1b9b208c911764c014f71b52865c Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 25 Nov 2020 17:38:44 +0800 Subject: [PATCH 02/30] =?UTF-8?q?=E6=B7=BB=E5=8A=A0SessionOptions=E7=B1=BB?= =?UTF-8?q?=EF=BC=8C=E5=9F=BA=E6=9C=AC=E5=AE=8C=E6=88=90=E5=BE=85=E6=B5=8B?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 322 +++++++++++++++++++++++++++++++++++---- DrissionPage/drission.py | 13 +- DrissionPage/mix_page.py | 4 +- 3 files changed, 304 insertions(+), 35 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 84e68a9..84c2b19 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -24,11 +24,15 @@ class OptionsManager(object): self._conf = ConfigParser() self._conf.read(self.ini_path, encoding='utf-8') + self._paths = None + self._chrome_options = None + self._session_options = None + if 'global_tmp_path' not in self.paths or not self.get_value('paths', 'global_tmp_path'): global_tmp_path = str((Path(__file__).parent / 'tmp').absolute()) Path(global_tmp_path).mkdir(parents=True, exist_ok=True) self.set_item('paths', 'global_tmp_path', global_tmp_path) - self.save() + self.save(self.ini_path) def __text__(self) -> str: """打印ini文件内容""" @@ -42,17 +46,26 @@ class OptionsManager(object): @property def paths(self) -> dict: """返回paths设置""" - return self.get_option('paths') + if self._paths is None: + self._paths = self.get_option('paths') + + return self._paths @property def chrome_options(self) -> dict: """返回chrome设置""" - return self.get_option('chrome_options') + if self._chrome_options is None: + self._chrome_options = self.get_option('chrome_options') + + return self._chrome_options @property def session_options(self) -> dict: """返回session设置""" - return self.get_option('session_options') + if self._session_options is None: + self._session_options = self.get_option('session_options') + + return self._session_options def get_value(self, section: str, item: str) -> Any: """获取配置的值 \n @@ -62,7 +75,7 @@ class OptionsManager(object): """ try: return eval(self._conf.get(section, item)) - except SyntaxError: + except (SyntaxError, NameError): return self._conf.get(section, item) except NoSectionError and NoOptionError: return None @@ -91,6 +104,7 @@ class OptionsManager(object): :return: 当前对象 """ self._conf.set(section, item, str(value)) + self.__setattr__(f'_{section}', None) return self def save(self, path: str = None): @@ -98,15 +112,238 @@ class OptionsManager(object): :param path: ini文件的路径,传入 'default' 保存到默认ini文件 :return: 当前对象 """ - path = Path(__file__).parent / 'configs.ini' if path == 'default' else path - path = Path(path or self.ini_path) + if path == 'default': + path = (Path(__file__).parent / 'configs.ini').absolute() + elif path is None: + path = Path(self.ini_path).absolute() + else: + path = Path(path).absolute() + path = path / 'config.ini' if path.is_dir() else path - path = path.absolute() - self._conf.write(open(path, 'w', encoding='utf-8')) + + self._conf.write(open(str(path), 'w', encoding='utf-8')) return self +class SessionOptions(object): + def __init__(self, read_file: bool = True, ini_path: str = None): + """ + :param read_file: + :param ini_path: + """ + self.ini_path = None + self._headers = None + self._cookies = None + self._auth = None + self._proxies = None + self._hooks = None + self._params = None + self._verify = None + self._cert = None + self._adapters = None + self._stream = None + self._trust_env = None + self._max_redirects = None + + if read_file: + self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini') + om = OptionsManager(self.ini_path) + options_dict = om.session_options + + if options_dict.get('headers', None) is not None: + self._headers = {key.lower(): options_dict['headers'][key] for key in options_dict['headers']} + + if options_dict.get('cookies', None) is not None: + self._headers = options_dict['cookies'] + + if options_dict.get('auth', None) is not None: + self._headers = options_dict['auth'] + + if options_dict.get('proxies', None) is not None: + self._headers = options_dict['proxies'] + + if options_dict.get('hooks', None) is not None: + self._headers = options_dict['hooks'] + + if options_dict.get('params', None) is not None: + self._headers = options_dict['params'] + + if options_dict.get('verify', None) is not None: + self._headers = options_dict['verify'] + + if options_dict.get('cert', None) is not None: + self._headers = options_dict['cert'] + + if options_dict.get('adapters', None) is not None: + self._headers = options_dict['adapters'] + + if options_dict.get('stream', None) is not None: + self._headers = options_dict['stream'] + + if options_dict.get('trust_env', None) is not None: + self._headers = options_dict['trust_env'] + + if options_dict.get('max_redirects', None) is not None: + self._headers = options_dict['max_redirects'] + + # self._cookies = options_dict.get('cookies', None) + # self._auth = options_dict.get('auth', None) + # self._proxies = options_dict.get('proxies', None) + # self._hooks = options_dict.get('hooks', None) + # self._params = options_dict.get('params', None) + # self._verify = options_dict.get('verify', True) + # self._cert = options_dict.get('cert', None) + # self._adapters = options_dict.get('adapters', None) + # self._stream = options_dict.get('stream', None) + # self._trust_env = options_dict.get('trust_env', True) + # self._max_redirects = options_dict.get('max_redirects', None) + + @property + def headers(self) -> dict: + return self._headers + + @property + def cookies(self) -> dict: + return self._cookies + + @property + def auth(self) -> tuple: + return self._auth + + @property + def proxies(self) -> dict: + return self._proxies + + @property + def hooks(self) -> dict: + return self._hooks + + @property + def params(self) -> dict: + return self._params + + @property + def verify(self) -> bool: + return self._verify + + @property + def cert(self) -> Union[str, tuple]: + return self._cert + + @property + def adapters(self): + return self._adapters + + @property + def stream(self) -> bool: + return self._stream + + @property + def trust_env(self) -> bool: + return self._trust_env + + @property + def max_redirects(self) -> int: + return self._max_redirects + + @headers.setter + def headers(self, headers: dict) -> None: + self._headers = {key.lower(): headers[key] for key in headers} + + @cookies.setter + def cookies(self, cookies: dict) -> None: + self._cookies = cookies + + @auth.setter + def auth(self, auth: tuple) -> None: + self._auth = auth + + @proxies.setter + def proxies(self, proxies: dict) -> None: + self._proxies = proxies + + @hooks.setter + def hooks(self, hooks: dict) -> None: + self._hooks = hooks + + @params.setter + def params(self, params: dict) -> None: + self._params = params + + @verify.setter + def verify(self, verify: bool) -> None: + self._verify = verify + + @cert.setter + def cert(self, cert: Union[str, tuple]) -> None: + self._cert = cert + + @adapters.setter + def adapters(self, adapters) -> None: + self._adapters = adapters + + @stream.setter + def stream(self, stream: bool) -> None: + self._stream = stream + + @trust_env.setter + def trust_env(self, trust_env: bool) -> None: + self._trust_env = trust_env + + @max_redirects.setter + def max_redirects(self, max_redirects: int) -> None: + self._max_redirects = max_redirects + + def set_header(self, attr: str, value: str): + if self._headers is None: + self._headers = {} + + self._headers[attr.lower()] = value + return self + + def remove_header(self, attr: str): + if self._headers is None: + return self + + attr = attr.lower() + if attr in self._headers: + self._headers.pop(attr) + + return self + + def save(self, path: str = None): + """保存设置到文件 \n + :param path: ini文件的路径,传入 'default' 保存到默认ini文件 + :return: 当前对象 + """ + if path == 'default': + path = (Path(__file__).parent / 'configs.ini').absolute() + elif path is None: + path = Path(self.ini_path).absolute() + else: + path = Path(path).absolute() + + path = path / 'config.ini' if path.is_dir() else path + + if path.exists(): + om = OptionsManager(path) + else: + om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) + + options = _session_options_to_dict(self) + + for i in options: + om.set_item('session_options', i, options[i]) + + om.save(str(path)) + + return self + + def as_dict(self) -> dict: + return _session_options_to_dict(self) + + class DriverOptions(Options): """chrome浏览器配置类,继承自selenium.webdriver.chrome.options的Options类, 增加了删除配置和保存到文件方法。 @@ -125,6 +362,7 @@ class DriverOptions(Options): self.ini_path = ini_path or str(Path(__file__).parent / 'configs.ini') om = OptionsManager(self.ini_path) options_dict = om.chrome_options + self._binary_location = options_dict.get('binary_location', '') self._arguments = options_dict.get('arguments', []) self._extensions = options_dict.get('extensions', []) @@ -145,12 +383,21 @@ class DriverOptions(Options): :param path: ini文件的路径,传入 'default' 保存到默认ini文件 :return: 当前对象 """ - om = OptionsManager() - options = _chrome_options_to_dict(self) - path = Path(__file__).parent / 'configs.ini' if path == 'default' else path - path = Path(path or self.ini_path) + if path == 'default': + path = (Path(__file__).parent / 'configs.ini').absolute() + elif path is None: + path = Path(self.ini_path).absolute() + else: + path = Path(path).absolute() + path = path / 'config.ini' if path.is_dir() else path - path = path.absolute() + + if path.exists(): + om = OptionsManager(path) + else: + om = OptionsManager(self.ini_path or str(Path(__file__).parent / 'configs.ini')) + + options = self.as_dict() for i in options: if i == 'driver_path': @@ -158,7 +405,7 @@ class DriverOptions(Options): else: om.set_item('chrome_options', i, options[i]) - om.save(path) + om.save(str(path)) return self @@ -296,6 +543,9 @@ class DriverOptions(Options): return self + def as_dict(self) -> dict: + return _chrome_options_to_dict(self) + def _dict_to_chrome_options(options: dict) -> Options: """从传入的字典获取浏览器设置,返回ChromeOptions对象 \n @@ -349,25 +599,41 @@ def _dict_to_chrome_options(options: dict) -> Options: return chrome_options -def _chrome_options_to_dict(options: Union[dict, DriverOptions, None]) -> Union[dict, None]: +def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None]) -> Union[dict, None]: """把chrome配置对象转换为字典 \n :param options: chrome配置对象,字典或DriverOptions对象 :return: 配置字典 """ - if options is None or isinstance(options, dict): + if isinstance(options, (dict, type(None))): return options re_dict = dict() - re_dict['debugger_address'] = options.debugger_address - re_dict['binary_location'] = options.binary_location - re_dict['debugger_address'] = options.debugger_address - re_dict['arguments'] = options.arguments - re_dict['extensions'] = options.extensions - re_dict['experimental_options'] = options.experimental_options + attrs = ['debugger_address', 'binary_location', 'arguments', 'extensions', 'experimental_options', 'driver_path'] + + for attr in attrs: + re_dict[attr] = options.__getattribute__(f'_{attr}') + + return re_dict + + +def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: + """把session配置对象装换为字典 \n + :param options: session配置对象或字典 + :return: 配置字典 + """ + if isinstance(options, (dict, type(None))): + return options + + re_dict = dict() + attrs = ['headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', + 'adapters', 'stream', 'trust_env', 'max_redirects'] + + for attr in attrs: + val = options.__getattribute__(f'_{attr}') + if val is not None: + re_dict[attr] = val + + # cert属性默认值为None,未免无法区分是否被设置,主动赋值 + re_dict['cert'] = options.__getattribute__('_cert') - try: - re_dict['driver_path'] = options.driver_path - except: - re_dict['driver_path'] = None - # re_dict['capabilities'] = options.capabilities return re_dict diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 81cdeec..6936ad9 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -15,15 +15,16 @@ from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver from tldextract import extract -from .config import OptionsManager, _dict_to_chrome_options, _chrome_options_to_dict +from .config import (OptionsManager, _dict_to_chrome_options, _session_options_to_dict, + SessionOptions, DriverOptions, _chrome_options_to_dict) class Drission(object): """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色""" def __init__(self, - driver_or_options: Union[WebDriver, dict, Options] = None, - session_or_options: Union[Session, dict] = None, + driver_or_options: Union[WebDriver, dict, Options, DriverOptions] = None, + session_or_options: Union[Session, dict, SessionOptions] = None, ini_path: str = None, proxy: dict = None): """初始化,可接收现成的WebDriver和Session对象,或接收它们的配置信息生成对象 \n @@ -37,6 +38,7 @@ class Drission(object): self._driver_path = 'chromedriver' self._proxy = proxy + # ------------------处理session options---------------------- # 若接收到Session对象,直接记录 if isinstance(session_or_options, Session): self._session = session_or_options @@ -48,8 +50,9 @@ class Drission(object): if session_or_options is None: self._session_options = OptionsManager(ini_path).session_options else: - self._session_options = session_or_options + self._session_options = _session_options_to_dict(session_or_options) + # ------------------处理driver options---------------------- # 若接收到WebDriver对象,直接记录 if isinstance(driver_or_options, WebDriver): self._driver = driver_or_options @@ -80,7 +83,7 @@ class Drission(object): for i in attrs: if i in self._session_options: - exec(f'self._session.{i} = self._session_options["{i}"]') + self._session.__setattr__(i, self._session_options[i]) if self._proxy: self._session.proxies = self._proxy diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index da92419..e7ef113 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -10,7 +10,7 @@ from requests import Response, Session from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement -from .config import DriverOptions +from .config import DriverOptions, SessionOptions from .drission import Drission from .driver_element import DriverElement from .driver_page import DriverPage @@ -38,7 +38,7 @@ class MixPage(Null, SessionPage, DriverPage): mode: str = 'd', timeout: float = 10, driver_options: Union[dict, DriverOptions] = None, - session_options: dict = None): + session_options: Union[dict, SessionOptions] = None): """初始化函数 \n :param drission: Drission对象,传入's'或'd'可自动创建Drission对象 :param mode: 'd' 或 's',即driver模式和session模式 From 408ede33913f8347592ed944c98ba755cadfe85d Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 25 Nov 2020 17:38:52 +0800 Subject: [PATCH 03/30] =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 2 +- README.zh-cn.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/README.en.md b/README.en.md index deccfe5..3315e24 100644 --- a/README.en.md +++ b/README.en.md @@ -1074,7 +1074,7 @@ The Drission class is used to manage WebDriver objects and Session objects, and Parameter Description: -- driver_or_options: [WebDriver, dict, Options] - WebDriver object or chrome configuration parameters. +- driver_or_options: [WebDriver, dict, Options, DriverOptions] - WebDriver object or chrome configuration parameters. - session_or_options: [Session, dict] - Session object configuration parameters - ini_path: str - ini file path, the default is the ini file under the DrissionPage folder - proxy: dict - proxy settings diff --git a/README.zh-cn.md b/README.zh-cn.md index fe43c9c..3b9a646 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -1090,10 +1090,10 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角 参数说明: -- driver_or_options: [WebDriver, dict, Options] - WebDriver 对象或 chrome 配置参数。 -- session_or_options: [Session, dict] - Session 对象配置参数 -- ini_path: str - ini 文件路径,默认为 DrissionPage 文件夹下的ini文件 -- proxy: dict - 代理设置 +- driver_or_options: [WebDriver, dict, Options, DriverOptions] - WebDriver 对象或 chrome 配置参数。 +- session_or_options: [Session, dict] - Session 对象配置参数 +- ini_path: str - ini 文件路径,默认为 DrissionPage 文件夹下的ini文件 +- proxy: dict - 代理设置 From 9d67fe74a4d624b119717c5c3070882c9bb15988 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 25 Nov 2020 22:08:42 +0800 Subject: [PATCH 04/30] =?UTF-8?q?=E4=BC=98=E5=8C=96=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 53 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 29 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 6936ad9..90e06a5 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -15,8 +15,8 @@ from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver from tldextract import extract -from .config import (OptionsManager, _dict_to_chrome_options, _session_options_to_dict, - SessionOptions, DriverOptions, _chrome_options_to_dict) +from .config import (_dict_to_chrome_options, _session_options_to_dict, + SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager) class Drission(object): @@ -35,44 +35,37 @@ class Drission(object): """ self._session = None self._driver = None - self._driver_path = 'chromedriver' self._proxy = proxy + om = OptionsManager(ini_path) if session_or_options is None or driver_or_options is None else None + # ------------------处理session options---------------------- - # 若接收到Session对象,直接记录 - if isinstance(session_or_options, Session): - self._session = session_or_options + if session_or_options is None: + self._session_options = om.session_options - # 否则记录其配置信息 else: + # 若接收到Session对象,直接记录 + if isinstance(session_or_options, Session): + self._session = session_or_options - # 若接收到配置信息则记录,否则从ini文件读取 - if session_or_options is None: - self._session_options = OptionsManager(ini_path).session_options + # 否则记录其配置信息 else: self._session_options = _session_options_to_dict(session_or_options) # ------------------处理driver options---------------------- - # 若接收到WebDriver对象,直接记录 - if isinstance(driver_or_options, WebDriver): - self._driver = driver_or_options + if driver_or_options is None: + self._driver_options = om.chrome_options + self._driver_options['driver_path'] = om.paths.get('chromedriver_path', None) - # 否则记录其配置信息 else: + # 若接收到WebDriver对象,直接记录 + if isinstance(driver_or_options, WebDriver): + self._driver = driver_or_options - # 若接收到配置信息则记录,否则从ini文件读取 - if driver_or_options is None: - om = OptionsManager(ini_path) - self._driver_options = om.chrome_options - - if om.paths.get('chromedriver_path', None): - self._driver_path = om.paths['chromedriver_path'] + # 否则记录其配置信息 else: self._driver_options = _chrome_options_to_dict(driver_or_options) - if self._driver_options.get('driver_path', None): - self._driver_path = self._driver_options['driver_path'] - @property def session(self) -> Session: """返回Session对象,如未初始化则按配置信息创建""" @@ -102,8 +95,10 @@ class Drission(object): if self._proxy: options.add_argument(f'--proxy-server={self._proxy["http"]}') + driver_path = self._driver_options.get('driver_path', None) or 'chromedriver' + try: - self._driver = webdriver.Chrome(self._driver_path, options=options) + self._driver = webdriver.Chrome(driver_path, options=options) except SessionNotCreatedException: print('Chrome版本与chromedriver版本不匹配,可执行easy_set.get_match_driver()自动下载匹配的版本。') exit(0) @@ -130,12 +125,12 @@ class Drission(object): return self._session_options @session_options.setter - def session_options(self, value: dict) -> None: - """设置session配置 - :param value: session配置字典 + def session_options(self, options: Union[dict, SessionOptions]) -> None: + """设置session配置 \n + :param options: session配置字典 :return: None """ - self._session_options = value + self._session_options = _session_options_to_dict(options) @property def proxy(self) -> Union[None, dict]: From 2f019fb4bac25caffc331848a87289de467436c5 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 25 Nov 2020 22:35:29 +0800 Subject: [PATCH 05/30] =?UTF-8?q?global=5Ftmp=5Fpath=E6=94=B9=E5=90=8D?= =?UTF-8?q?=E4=B8=BAtmp=5Fpath?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 101 ++++++++++++++++++++++++++++++--------- DrissionPage/configs.ini | 8 ++-- DrissionPage/drission.py | 2 +- DrissionPage/easy_set.py | 2 +- README.en.md | 6 +-- README.zh-cn.md | 20 ++++---- 6 files changed, 98 insertions(+), 41 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 84c2b19..2e07abf 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -1,6 +1,6 @@ # -*- coding:utf-8 -*- """ -配置文件 +管理配置的类 @Author : g1879 @Contact : g1879@qq.com @File : config.py @@ -28,10 +28,10 @@ class OptionsManager(object): self._chrome_options = None self._session_options = None - if 'global_tmp_path' not in self.paths or not self.get_value('paths', 'global_tmp_path'): - global_tmp_path = str((Path(__file__).parent / 'tmp').absolute()) - Path(global_tmp_path).mkdir(parents=True, exist_ok=True) - self.set_item('paths', 'global_tmp_path', global_tmp_path) + if 'tmp_path' not in self.paths or not self.get_value('paths', 'tmp_path'): + tmp_path = str((Path(__file__).parent / 'tmp').absolute()) + Path(tmp_path).mkdir(parents=True, exist_ok=True) + self.set_item('paths', 'tmp_path', tmp_path) self.save(self.ini_path) def __text__(self) -> str: @@ -128,9 +128,9 @@ class OptionsManager(object): class SessionOptions(object): def __init__(self, read_file: bool = True, ini_path: str = None): - """ - :param read_file: - :param ini_path: + """requests的Session对象配置类 \n + :param read_file: 是否从文件读取配置 + :param ini_path: ini文件路径 """ self.ini_path = None self._headers = None @@ -187,115 +187,168 @@ class SessionOptions(object): if options_dict.get('max_redirects', None) is not None: self._headers = options_dict['max_redirects'] - # self._cookies = options_dict.get('cookies', None) - # self._auth = options_dict.get('auth', None) - # self._proxies = options_dict.get('proxies', None) - # self._hooks = options_dict.get('hooks', None) - # self._params = options_dict.get('params', None) - # self._verify = options_dict.get('verify', True) - # self._cert = options_dict.get('cert', None) - # self._adapters = options_dict.get('adapters', None) - # self._stream = options_dict.get('stream', None) - # self._trust_env = options_dict.get('trust_env', True) - # self._max_redirects = options_dict.get('max_redirects', None) - @property def headers(self) -> dict: + """返回headers设置信息""" return self._headers @property def cookies(self) -> dict: + """返回cookies设置信息""" return self._cookies @property def auth(self) -> tuple: + """返回auth设置信息""" return self._auth @property def proxies(self) -> dict: + """返回proxies设置信息""" return self._proxies @property def hooks(self) -> dict: + """返回hooks设置信息""" return self._hooks @property def params(self) -> dict: + """返回params设置信息""" return self._params @property def verify(self) -> bool: + """返回verify设置信息""" return self._verify @property def cert(self) -> Union[str, tuple]: + """返回cert设置信息""" return self._cert @property def adapters(self): + """返回adapters设置信息""" return self._adapters @property def stream(self) -> bool: + """返回stream设置信息""" return self._stream @property def trust_env(self) -> bool: + """返回trust_env设置信息""" return self._trust_env @property def max_redirects(self) -> int: + """返回max_redirects设置信息""" return self._max_redirects @headers.setter def headers(self, headers: dict) -> None: + """设置headers参数 \n + :param headers: 参数值 + :return: None + """ self._headers = {key.lower(): headers[key] for key in headers} @cookies.setter def cookies(self, cookies: dict) -> None: + """设置cookies参数 \n + :param cookies: 参数值 + :return: None + """ self._cookies = cookies @auth.setter def auth(self, auth: tuple) -> None: + """设置auth参数 \n + :param auth: 参数值 + :return: None + """ self._auth = auth @proxies.setter def proxies(self, proxies: dict) -> None: + """设置proxies参数 \n + :param proxies: 参数值 + :return: None + """ self._proxies = proxies @hooks.setter def hooks(self, hooks: dict) -> None: + """设置hooks参数 \n + :param hooks: 参数值 + :return: None + """ self._hooks = hooks @params.setter def params(self, params: dict) -> None: + """设置params参数 \n + :param params: 参数值 + :return: None + """ self._params = params @verify.setter def verify(self, verify: bool) -> None: + """设置verify参数 \n + :param verify: 参数值 + :return: None + """ self._verify = verify @cert.setter def cert(self, cert: Union[str, tuple]) -> None: + """设置cert参数 \n + :param cert: 参数值 + :return: None + """ self._cert = cert @adapters.setter def adapters(self, adapters) -> None: + """设置 \n + :param adapters: 参数值 + :return: None + """ self._adapters = adapters @stream.setter def stream(self, stream: bool) -> None: + """设置stream参数 \n + :param stream: 参数值 + :return: None + """ self._stream = stream @trust_env.setter def trust_env(self, trust_env: bool) -> None: + """设置trust_env参数 \n + :param trust_env: 参数值 + :return: None + """ self._trust_env = trust_env @max_redirects.setter def max_redirects(self, max_redirects: int) -> None: + """设置max_redirects参数 \n + :param max_redirects: 参数值 + :return: None + """ self._max_redirects = max_redirects def set_header(self, attr: str, value: str): + """设置header中一个项 \n + :param attr: 设置名称 + :param value: 设置值 + :return: 返回当前对象 + """ if self._headers is None: self._headers = {} @@ -303,6 +356,10 @@ class SessionOptions(object): return self def remove_header(self, attr: str): + """从headers中删除一个设置 \n + :param attr: 要删除的设置 + :return: 返回当前对象 + """ if self._headers is None: return self @@ -315,7 +372,7 @@ class SessionOptions(object): def save(self, path: str = None): """保存设置到文件 \n :param path: ini文件的路径,传入 'default' 保存到默认ini文件 - :return: 当前对象 + :return: 返回当前对象 """ if path == 'default': path = (Path(__file__).parent / 'configs.ini').absolute() @@ -633,7 +690,7 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio if val is not None: re_dict[attr] = val - # cert属性默认值为None,未免无法区分是否被设置,主动赋值 + # cert属性默认值为None,未免无法区分是否被设置,故主动赋值 re_dict['cert'] = options.__getattribute__('_cert') return re_dict diff --git a/DrissionPage/configs.ini b/DrissionPage/configs.ini index 9087ada..f97ca93 100644 --- a/DrissionPage/configs.ini +++ b/DrissionPage/configs.ini @@ -1,11 +1,11 @@ [paths] -chromedriver_path = -global_tmp_path = +chromedriver_path = +tmp_path = [chrome_options] debugger_address = -binary_location = -arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--headless', '--disable-infobars'] +binary_location = +arguments = ['--no-sandbox', '--disable-gpu', '--ignore-certificate-errors', '--disable-infobars'] extensions = [] experimental_options = {'prefs': {'profile.default_content_settings.popups': 0, 'profile.default_content_setting_values': {'notifications': 2}, 'plugins.plugins_list': [{'enabled': False, 'name': 'Chrome PDF Viewer'}]}, 'useAutomationExtension': False, 'excludeSwitches': ['enable-automation']} diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 90e06a5..e3d6dd2 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -55,7 +55,7 @@ class Drission(object): # ------------------处理driver options---------------------- if driver_or_options is None: self._driver_options = om.chrome_options - self._driver_options['driver_path'] = om.paths.get('chromedriver_path', None) + self._driver_options['driver_path'] = om.get_value('paths', 'chromedriver_path') else: # 若接收到WebDriver对象,直接记录 diff --git a/DrissionPage/easy_set.py b/DrissionPage/easy_set.py index 593789e..59e5c52 100644 --- a/DrissionPage/easy_set.py +++ b/DrissionPage/easy_set.py @@ -65,7 +65,7 @@ def set_paths(driver_path: str = None, om.set_item('chrome_options', 'debugger_address', format_path(debugger_address)) if tmp_path is not None: - om.set_item('paths', 'global_tmp_path', format_path(tmp_path)) + om.set_item('paths', 'tmp_path', format_path(tmp_path)) if download_path is not None: experimental_options = om.get_value('chrome_options', 'experimental_options') diff --git a/README.en.md b/README.en.md index 3315e24..9219e50 100644 --- a/README.en.md +++ b/README.en.md @@ -405,7 +405,7 @@ In addition to the above two paths, this method can also set the following paths ```python debugger_address # Debug browser address, such as: 127.0.0.1:9222 download_path # Download file path -global_tmp_path # Temporary folder path +tmp_path # Temporary folder path user_data_path # User data path cache_path # cache path ``` @@ -869,7 +869,7 @@ The ini file has three parts by default: paths, chrome_options, and session_opti ; chromedriver.exe path chromedriver_path = ; Temporary folder path, used to save screenshots, file downloads, etc. -global_tmp_path = +tmp_path = [chrome_options] ; The address and port of the opened browser, such as 127.0.0.1:9222 @@ -2833,7 +2833,7 @@ Parameter Description: - download_path: str-download file path -- global_tmp_path: str-Temporary folder path +- tmp_path: str-Temporary folder path - user_data_path: str-user data path diff --git a/README.zh-cn.md b/README.zh-cn.md index 3b9a646..7c0c6d4 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -407,7 +407,7 @@ Message: session not created: Chrome version must be between 70 and 73 ```python debugger_address # 调试浏览器地址,如:127.0.0.1:9222 download_path # 下载文件路径 -global_tmp_path # 临时文件夹路径 +tmp_path # 临时文件夹路径 user_data_path # 用户数据路径 cache_path # 缓存路径 ``` @@ -887,7 +887,7 @@ ini 文件默认拥有三部分配置:paths、chrome_options、session_options ; chromedriver.exe路径 chromedriver_path = ; 临时文件夹路径,用于保存截图、文件下载等 -global_tmp_path = +tmp_path = [chrome_options] ; 已打开的浏览器地址和端口,如127.0.0.1:9222 @@ -2802,15 +2802,15 @@ chrome 配置太复杂,所以把常用的配置写成简单的方法,调用 参数说明: -- driver_path: str - chromedriver.exe 路径 -- chrome_path: str - chrome.exe 路径 +- driver_path: str - chromedriver.exe 路径 +- chrome_path: str - chrome.exe 路径 - debugger_address: str - 调试浏览器地址,例:127.0.0.1:9222 -- download_path: str - 下载文件路径 -- global_tmp_path: str - 临时文件夹路径 -- user_data_path: str - 用户数据路径 -- cache_path: str - 缓存路径 -- ini_path: str - ini 文件路径,为 None 则保存到默认 ini 文件 -- check_version: bool - 是否检查 chromedriver 和 chrome 是否匹配 +- download_path: str - 下载文件路径 +- tmp_path: str - 临时文件夹路径 +- user_data_path: str - 用户数据路径 +- cache_path: str - 缓存路径 +- ini_path: str - ini 文件路径,为 None 则保存到默认 ini 文件 +- check_version: bool - 是否检查 chromedriver 和 chrome 是否匹配 返回: None From 161bbb92ad2c65ac5c1025bdadb1998a3445588e Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 25 Nov 2020 23:13:15 +0800 Subject: [PATCH 06/30] =?UTF-8?q?=E5=AF=B9Drission=E5=AF=B9=E8=B1=A1?= =?UTF-8?q?=E8=AE=BE=E7=BD=AEsession=5Foptions=E5=B1=9E=E6=80=A7=E6=97=B6?= =?UTF-8?q?=E5=8F=AF=E5=8D=B3=E4=BD=BF=E6=94=B9=E5=8F=98=E5=86=85=E7=BD=AE?= =?UTF-8?q?=E7=9A=84Session?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index e3d6dd2..38110bb 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -70,13 +70,7 @@ class Drission(object): def session(self) -> Session: """返回Session对象,如未初始化则按配置信息创建""" if self._session is None: - self._session = Session() - attrs = ['headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'adapters', 'stream', 'trust_env', 'max_redirects'] - - for i in attrs: - if i in self._session_options: - self._session.__setattr__(i, self._session_options[i]) + self._set_session(self._session_options) if self._proxy: self._session.proxies = self._proxy @@ -131,6 +125,7 @@ class Drission(object): :return: None """ self._session_options = _session_options_to_dict(options) + self._set_session(self._session_options) @property def proxy(self) -> Union[None, dict]: @@ -159,6 +154,17 @@ class Drission(object): for cookie in cookies: self._ensure_add_cookie(cookie) + def _set_session(self, data: dict) -> None: + if self._session is None: + self._session = Session() + + attrs = ['headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', + 'cert', 'adapters', 'stream', 'trust_env', 'max_redirects'] + + for i in attrs: + if i in data: + self._session.__setattr__(i, data[i]) + def cookies_to_session(self, copy_user_agent: bool = False, driver: WebDriver = None, session: Session = None) -> None: From e31472613f8229724b094e937e99075ed4907a9e Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 25 Nov 2020 23:13:28 +0800 Subject: [PATCH 07/30] =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 21 ++++++++++++--------- README.zh-cn.md | 23 +++++++++++++++++++++-- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/README.en.md b/README.en.md index 9219e50..b3f4e9b 100644 --- a/README.en.md +++ b/README.en.md @@ -955,17 +955,20 @@ drission = Drission(ini_path='D:\\settings.ini') # Use the specified ini file to ## easy_set method -The methods of frequently used settings can be quickly modified. Calling the easy_set method will modify the content of the default ini file. +Methods to quickly modify common settings. All for driver mode settings. Calling the easy_set method will modify the content of the default ini file. ```python -set_headless(True) # Turn on headless mode -set_no_imgs(True) # Turn on no image mode -set_no_js(True) # Disable JS -set_mute(True) # Turn on mute mode -set_user_agent('Mozilla/5.0 (Macintosh; Int......') # set user agent -set_proxy('127.0.0.1:8888') # set proxy -set_paths(paths) # See [Initialization] section -set_argument(arg, value) # Set the attribute. If the attribute has no value (such as'zh_CN.UTF- 8'), the value is bool, which means switch; otherwise, the value is str. When the value is'' or False, delete the attribute item +get_match_driver() # Identify the chrome version and automatically download the matching chromedriver.exe +show_settings() # Print all settings +set_headless(True) # Turn on headless mode +set_no_imgs(True) # Turn on no image mode +set_no_js(True) # Disable JS +set_mute(True) # Turn on mute mode +set_user_agent('Mozilla/5.0 (Macintosh; Int......') # set user agent +set_proxy('127.0.0.1:8888') # set proxy +set_paths(paths) # See [Initialization] section +set_argument(arg, value) # Set the attribute. If the attribute has no value (such as'zh_CN.UTF-8'), the value is bool to indicate the switch; otherwise, the value is str. When the value is'' or False, delete the attribute item +check_driver_version() # Check if chrome and chromedriver versions match ``` # POM mode diff --git a/README.zh-cn.md b/README.zh-cn.md index 7c0c6d4..be7a8ff 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -827,7 +827,7 @@ page.download(url, save_path, 'img', 'rename', show_msg=True) -## Chrome 快捷设置 +## Chrome 设置 chrome 的配置很繁琐,为简化使用,本库提供了常用配置的设置方法。 @@ -872,6 +872,22 @@ do.save('default') # 保存当前设置到默认 ini 文件 +## Session 设置 + + + +### SessionOPtions 对象 + +SessionOptions 对象用于管理 Session 的配置信息。它创建时默认读取默认 ini 文件配置信息,也可手动设置所需信息。 + + + +### 使用方法 + + + + + ## 保存配置 因 chrome 和 headers 配置繁多,故设置一个 ini 文件专门用于保存常用配置,你可使用 OptionsManager 对象获取和保存配置,用 DriverOptions 对象修改 chrome 配置。你也可以保存多个 ini 文件,按不同项目须要调用。 @@ -971,9 +987,11 @@ drission = Drission(ini_path='D:\\settings.ini') # 使用指定 ini 文件创 ## easy_set 方法 -可快速地修改常用设置的方法,调用 easy_set 方法会修改默认 ini 文件相关内容。 +可快速地修改常用设置的方法。全部用于 driver 模式的设置。调用 easy_set 方法会修改默认 ini 文件相关内容。 ```python +get_match_driver() # 识别chrome版本并自动下载匹配的chromedriver.exe +show_settings() # 打印所有设置 set_headless(True) # 开启 headless 模式 set_no_imgs(True) # 开启无图模式 set_no_js(True) # 禁用 JS @@ -982,6 +1000,7 @@ set_user_agent('Mozilla/5.0 (Macintosh; Int......') # 设置 user agent set_proxy('127.0.0.1:8888') # 设置代理 set_paths(paths) # 见 [初始化] 一节 set_argument(arg, value) # 设置属性,若属性无值(如'zh_CN.UTF-8'),value 为 bool 表示开关;否则value为str,当 value为''或 False,删除该属性项 +check_driver_version() # 检查chrome和chromedriver版本是否匹配 ``` # POM 模式 From 480ecc101fda4b7e306082f46b502b430ac40d40 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 26 Nov 2020 17:26:55 +0800 Subject: [PATCH 08/30] =?UTF-8?q?ele()=E5=8E=BB=E6=8E=89=E8=87=AA=E5=8A=A8?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0//=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 4 ++-- DrissionPage/session_page.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index dd3b18c..2560dd3 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -149,8 +149,8 @@ class DriverPage(object): raise ValueError("Len of loc_or_ele must be 2 when it's a tuple.") loc_or_ele = translate_loc(loc_or_ele) - if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')): - loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}' + # if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')): + # loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}' # 接收到DriverElement对象直接返回 elif isinstance(loc_or_ele, DriverElement): diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 1779ee2..a0c3c5e 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -108,8 +108,8 @@ class SessionPage(object): loc_or_ele = translate_loc(loc_or_ele) - if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')): - loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}' + # if loc_or_ele[0] == 'xpath' and not loc_or_ele[1].startswith(('/', '(')): + # loc_or_ele = loc_or_ele[0], f'//{loc_or_ele[1]}' elif isinstance(loc_or_ele, SessionElement): return loc_or_ele From ee95a8b45c0edd4beeac75d707d9c88881168afc Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 26 Nov 2020 17:27:29 +0800 Subject: [PATCH 09/30] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 2e07abf..8e71e33 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -155,37 +155,37 @@ class SessionOptions(object): self._headers = {key.lower(): options_dict['headers'][key] for key in options_dict['headers']} if options_dict.get('cookies', None) is not None: - self._headers = options_dict['cookies'] + self._cookies = options_dict['cookies'] if options_dict.get('auth', None) is not None: - self._headers = options_dict['auth'] + self._auth = options_dict['auth'] if options_dict.get('proxies', None) is not None: - self._headers = options_dict['proxies'] + self._proxies = options_dict['proxies'] if options_dict.get('hooks', None) is not None: - self._headers = options_dict['hooks'] + self._hooks = options_dict['hooks'] if options_dict.get('params', None) is not None: - self._headers = options_dict['params'] + self._params = options_dict['params'] if options_dict.get('verify', None) is not None: - self._headers = options_dict['verify'] + self._verify = options_dict['verify'] if options_dict.get('cert', None) is not None: - self._headers = options_dict['cert'] + self._cert = options_dict['cert'] if options_dict.get('adapters', None) is not None: - self._headers = options_dict['adapters'] + self._adapters = options_dict['adapters'] if options_dict.get('stream', None) is not None: - self._headers = options_dict['stream'] + self._stream = options_dict['stream'] if options_dict.get('trust_env', None) is not None: - self._headers = options_dict['trust_env'] + self._trust_env = options_dict['trust_env'] if options_dict.get('max_redirects', None) is not None: - self._headers = options_dict['max_redirects'] + self._max_redirects = options_dict['max_redirects'] @property def headers(self) -> dict: From a91b22df49c7a427237e5d688f35a8722702815e Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 26 Nov 2020 17:29:30 +0800 Subject: [PATCH 10/30] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E5=88=9B=E5=BB=BAsessi?= =?UTF-8?q?on=E6=97=B6=E5=86=99=E5=85=A5cookies=E5=A4=B1=E8=B4=A5=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 38110bb..dd97554 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -9,6 +9,7 @@ from typing import Union from urllib.parse import urlparse from requests import Session +from requests.cookies import RequestsCookieJar from selenium import webdriver from selenium.common.exceptions import WebDriverException, SessionNotCreatedException from selenium.webdriver.chrome.options import Options @@ -158,9 +159,19 @@ class Drission(object): if self._session is None: self._session = Session() - attrs = ['headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', + attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify', 'cert', 'adapters', 'stream', 'trust_env', 'max_redirects'] + if 'cookies' in data: + if isinstance(data['cookies'], (list, tuple)): + pass + # for cookie in data['cookies']: + # kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value')} + # self._session.cookies.set(cookie['name'], cookie['value'], **kwargs) + elif isinstance(data['cookies'], RequestsCookieJar): + for cookie in data['cookies']: + self._session.cookies.set_cookie(cookie) + for i in attrs: if i in data: self._session.__setattr__(i, data[i]) From 2c6aa439fdf239a1a87082bd35488c0fb9d96b2d Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 27 Nov 2020 00:39:06 +0800 Subject: [PATCH 11/30] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E5=88=9D=E5=A7=8B?= =?UTF-8?q?=E5=8C=96session=E6=97=B6cookies=E7=9A=84=E8=AE=BE=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 27 +++++++++++++++++++++++---- DrissionPage/drission.py | 18 +++++++----------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 8e71e33..84fff5a 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -6,9 +6,11 @@ @File : config.py """ from configparser import ConfigParser, NoSectionError, NoOptionError +from http.cookiejar import Cookie from pathlib import Path from typing import Any, Union +from requests.cookies import RequestsCookieJar from selenium import webdriver from selenium.webdriver.chrome.options import Options @@ -175,8 +177,8 @@ class SessionOptions(object): if options_dict.get('cert', None) is not None: self._cert = options_dict['cert'] - if options_dict.get('adapters', None) is not None: - self._adapters = options_dict['adapters'] + # if options_dict.get('adapters', None) is not None: + # self._adapters = options_dict['adapters'] if options_dict.get('stream', None) is not None: self._stream = options_dict['stream'] @@ -682,8 +684,15 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio return options re_dict = dict() - attrs = ['headers', 'cookies', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'adapters', 'stream', 'trust_env', 'max_redirects'] + attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify', + 'stream', 'trust_env', 'max_redirects'] # 'adapters', + + val = options.__getattribute__(f'_cookies') + if val is not None: + if isinstance(val, (list, tuple)): + re_dict['cookies'] = val + elif isinstance(val, RequestsCookieJar): + re_dict['cookies'] = [_cookie_to_dict(cookie) for cookie in val] for attr in attrs: val = options.__getattribute__(f'_{attr}') @@ -694,3 +703,13 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio re_dict['cert'] = options.__getattribute__('_cert') return re_dict + + +def _cookie_to_dict(cookie: Cookie) -> dict: + # TODO: 其它值? + cookie_dict = {'name': cookie.name, 'value': str(cookie.value), 'path': cookie.path, 'domain': cookie.domain} + + if cookie.expires: + cookie_dict['expiry'] = cookie.expires + + return cookie_dict diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index dd97554..3ee6837 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -17,7 +17,7 @@ from selenium.webdriver.chrome.webdriver import WebDriver from tldextract import extract from .config import (_dict_to_chrome_options, _session_options_to_dict, - SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager) + SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookie_to_dict) class Drission(object): @@ -160,14 +160,14 @@ class Drission(object): self._session = Session() attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'cert', 'adapters', 'stream', 'trust_env', 'max_redirects'] + 'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters' if 'cookies' in data: if isinstance(data['cookies'], (list, tuple)): - pass - # for cookie in data['cookies']: - # kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value')} - # self._session.cookies.set(cookie['name'], cookie['value'], **kwargs) + for cookie in data['cookies']: + kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value')} + self._session.cookies.set(cookie['name'], cookie['value'], **kwargs) + elif isinstance(data['cookies'], RequestsCookieJar): for cookie in data['cookies']: self._session.cookies.set_cookie(cookie) @@ -212,11 +212,7 @@ class Drission(object): # 翻译cookies for i in [x for x in session.cookies if domain in x.domain]: - cookie_data = {'name': i.name, 'value': str(i.value), 'path': i.path, 'domain': i.domain} - - if i.expires: - cookie_data['expiry'] = i.expires - + cookie_data = _cookie_to_dict(i) self._ensure_add_cookie(cookie_data, driver=driver) def _ensure_add_cookie(self, cookie, override_domain=None, driver=None) -> None: From 8fd13a70c052202413926a970c5e68bb166975a5 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 27 Nov 2020 17:25:53 +0800 Subject: [PATCH 12/30] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E5=90=8D=E6=9C=AB=E4=B8=BA=E7=A9=BA=E6=A0=BC=E6=97=B6get=5Fava?= =?UTF-8?q?ilable=5Ffile=5Fname()=E5=87=BA=E9=94=99=E7=9A=84bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 22c9e5d..c062606 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -256,7 +256,7 @@ def get_available_file_name(folder_path: str, file_name: str) -> str: base_name = file_Path.stem num = base_name.split(' ')[-1] - if num[0] == '(' and num[-1] == ')' and num[1:-1].isdigit(): + if num and num[0] == '(' and num[-1] == ')' and num[1:-1].isdigit(): num = int(num[1:-1]) file_name = f'{base_name.replace(f"({num})", "", -1)}({num + 1}){ext_name}' else: From 4e7e9e4dbfb9bda870899a17171da3865e8bcef3 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 29 Nov 2020 07:59:51 +0800 Subject: [PATCH 13/30] =?UTF-8?q?=E7=BB=A7=E7=BB=AD=E4=BF=AE=E6=94=B9Sessi?= =?UTF-8?q?onOptions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 84fff5a..e8f59ff 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -192,11 +192,15 @@ class SessionOptions(object): @property def headers(self) -> dict: """返回headers设置信息""" + if self._headers is None: + self._headers = {} return self._headers @property - def cookies(self) -> dict: + def cookies(self) -> list: """返回cookies设置信息""" + if self._cookies is None: + self._cookies = [] return self._cookies @property @@ -207,16 +211,22 @@ class SessionOptions(object): @property def proxies(self) -> dict: """返回proxies设置信息""" + if self._proxies is None: + self._proxies = {} return self._proxies @property def hooks(self) -> dict: """返回hooks设置信息""" + if self._hooks is None: + self._hooks = {} return self._hooks @property def params(self) -> dict: """返回params设置信息""" + if self._params is None: + self._params = {} return self._params @property @@ -258,7 +268,7 @@ class SessionOptions(object): self._headers = {key.lower(): headers[key] for key in headers} @cookies.setter - def cookies(self, cookies: dict) -> None: + def cookies(self, cookies: Union[list, tuple]) -> None: """设置cookies参数 \n :param cookies: 参数值 :return: None @@ -371,6 +381,16 @@ class SessionOptions(object): return self + def add_cookie(self, cookie): + pass + + def remove_cookie(self, name: str): + pass + + def clear_cookies(self): + """清空cookies""" + self.cookies = None + def save(self, path: str = None): """保存设置到文件 \n :param path: ini文件的路径,传入 'default' 保存到默认ini文件 @@ -684,8 +704,7 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio return options re_dict = dict() - attrs = ['headers', 'auth', 'proxies', 'hooks', 'params', 'verify', - 'stream', 'trust_env', 'max_redirects'] # 'adapters', + attrs = ['headers', 'proxies', 'hooks', 'params', 'verify', 'stream', 'trust_env', 'max_redirects'] # 'adapters', val = options.__getattribute__(f'_cookies') if val is not None: @@ -701,6 +720,7 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio # cert属性默认值为None,未免无法区分是否被设置,故主动赋值 re_dict['cert'] = options.__getattribute__('_cert') + re_dict['auth'] = options.__getattribute__('_auth') return re_dict From 68ecb000697b221c2a8b82467ba40a58f798a152 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 29 Nov 2020 08:00:03 +0800 Subject: [PATCH 14/30] =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 72 ++++++++++++++++++++++++++++++++++++++++--------- README.zh-cn.md | 63 ++++++++++++++++++++++++++++++++----------- 2 files changed, 107 insertions(+), 28 deletions(-) diff --git a/README.en.md b/README.en.md index b3f4e9b..341380f 100644 --- a/README.en.md +++ b/README.en.md @@ -418,6 +418,12 @@ Tips: +### Other methods + +If you don't want to use the ini file (for example, when you want to package the project), you can write the above two paths in the system path, or fill in the program. See the next section for the use of the latter. + + + ## Create drive object Drission The creation step is not necessary. If you want to get started quickly, you can skip this section. The MixPage object will automatically create the object. @@ -451,16 +457,21 @@ do.set_paths(chrome_path ='D:\\chrome\\chrome.exe', # Settings for s mode session_options = {'headers': {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}} +# Proxy settings, optional +proxy = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'} + # Incoming configuration, driver_options and session_options are optional, you need to use the corresponding mode to pass in -drission = Drission(driver_options, session_options) +drission = Drission(driver_options, session_options, proxy=proxy) ``` +The usage of DriverOptions and SessionOptions is detailed below. + ## Use page object MixPage The MixPage page object encapsulates common web page operations and realizes the switch between driver and session modes. -MixPage must receive a Drission object and use the driver or session in it. If it is not passed in, MixPage will create a Drission by itself (using the configuration of the default ini file). +MixPage must control a Drission object and use its driver or session. If it is not passed in, MixPage will create one by itself (using the incoming configuration information or reading from the default ini file). Tips: When multiple objects work together, you can pass the Drission object in one MixPage to another, so that multiple objects can share login information or operate the same page. @@ -485,8 +496,6 @@ page = MixPage(driver_options=DriverOption, session_options=SessionOption) # de ### visit website -If there is an error in the connection, the program will automatically retry twice. The number of retries and the waiting interval can be specified. - ```python # Default mode page.get(url) @@ -496,6 +505,8 @@ page.post(url, data, **kwargs) # Only session mode has post method page.get(url, retry=5, interval=0.5) ``` +Tips: If there is an error in the connection, the program will automatically retry twice. The number of retries and the waiting interval can be specified. + ### Switch mode @@ -506,6 +517,8 @@ Switch between s and d modes, the cookies and the URL you are visiting will be a page.change_mode(go=False) # If go is False, it means that the url is not redirected ``` +Tips: When using a method unique to a certain mode, it will automatically jump to that mode. + ### Page properties @@ -534,7 +547,7 @@ page.current_tab_handle # Return to the current tab page handle When calling a method that only belongs to d mode, it will automatically switch to d mode. See APIs for detailed usage. ```python -page.change_mode() # switch mode +page.change_mode() # Switch mode, it will automatically copy cookies page.cookies_to_session() # Copy cookies from WebDriver object to Session object page.cookies_to_driver() # Copy cookies from Session object to WebDriver object page.get(url, retry, interval, **kwargs) # Use get to access the web page, you can specify the number of retries and the interval @@ -577,11 +590,9 @@ page.eles() and element.eles() search and return a list of all elements that mee Description: -- The element search timeout is 10 seconds by default, you can also set it as needed. - -- In the following search statement, the colon: indicates a fuzzy match, and the equal sign = indicates an exact match - -- There are five types of query strings: @attribute name, tag, text, xpath, and css +- The element search timeout is 10 seconds by default, and it stops waiting when it times out or finds an element. You can also set it as needed. +- -You can find elements with query string or selenium native loc tuple (s mode can also be used) +-The query string has 7 methods such as @attribute name, tag, text, xpath, css, ., #, etc. ```python # Find by attribute @@ -590,6 +601,12 @@ page.eles('@class') # Find all elements with class attribute page.eles('@class:class_name') # Find all elements that have ele_class in class page.eles('@class=class_name') # Find all elements whose class is equal to ele_class +# Find by class or id +page.ele('#ele_id') # equivalent to page.ele('@id=ele_id') +page.ele('#:ele_id') # equivalent to page.ele('@id:ele_id') +page.ele('.ele_class') # equivalent to page.ele('@class=ele_class') +page.ele('.:ele_class') # equivalent to page.ele('@class:ele_class') + # Find by tag name page.ele('tag:li') # Find the first li element page.eles('tag:li') # Find all li elements @@ -603,7 +620,7 @@ page.ele('tag:div@text()=search_text') # Find the div element whose text is equ # Find according to text content page.ele('search text') # find the element containing the incoming text -page.eles('text:search text') # If the text starts with @, tag:, css:, xpath:, text:, add text: in front to avoid conflicts +page.eles('text:search text') # If the text starts with @, tag:, css:, xpath:, text:, you should add text: in front to avoid conflicts page.eles('text=search text') # The text is equal to the element of search_text # Find according to xpath or css selector @@ -626,7 +643,7 @@ element.parent # parent element element.next # next sibling element element.prev # previous sibling element -# Get shadow- dom, only support open shadow- root +# Get the shadow-root and treat it as an element. Only support open shadow-root ele1 = element.shadow_root.ele('tag:div') # Chain search @@ -780,6 +797,37 @@ response = session.get('https://www.baidu.com') +## requests function usage + +### Connection parameters + +In addition to passing in configuration information and connection parameters when creating, if necessary, you can also set connection parameters every time you visit the URL in the s mode. + +```python +headers = {'User-Agent':'...',} +cookies = {'name':'value',} +proxies = {'http': '127.0.0.1:1080','https': '127.0.0.1:1080'} +page.get(url, headers=headers, cookies=cookies, proxies=proxies) +``` + +Tips: + +-If the connection parameters are not specified, the s mode will automatically fill in the Host and Referer attributes according to the current domain name +-The Session configuration passed in when creating MixPage is globally effective + + + +### Response object + +The Response object obtained by requests is stored in page.response and can be used directly. Such as: + +```python +print(page.response.status_code) +print(page.response.headers) +``` + + + ## download file Selenium lacks effective management of browser download files, and it is difficult to detect download status, rename, and fail management. diff --git a/README.zh-cn.md b/README.zh-cn.md index be7a8ff..d1554b2 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -6,7 +6,7 @@ DrissionPage,即 driver 和 session 的合体。 是个基于 python 的 Web 自动化操作集成工具。 它实现了 selenium 和 requests 之间的无缝切换。 因此可以兼顾 selenium 的便利性和 requests 的高效率。 -它集成了页面常用功能,两种模式系统一致的 API,使用便捷。 +它集成了页面常用功能,两种模式系统一致的 API,使用便捷。 它用 POM 模式封装了页面元素常用的方法,很适合自动化操作功能扩展。 更棒的是,它的使用方式非常简洁和人性化,代码量少,对新手友好。 @@ -330,9 +330,9 @@ from DrissionPage import MixPage 配置路径有四种方法: - 使用 easy_set 工具的 get_match_driver() 方法(推荐) -- 将路径写入本库的ini文件 +- 将路径写入本库的 ini 文件 - 将两个路径写入系统变量 -- 使用时手动传入路径 +- 在代码中填写路径 ### 使用 get_match_driver() 方法 @@ -420,6 +420,12 @@ Tips: +### 其它方法 + +若你不想使用 ini 文件(如要打包项目时),可在系统路径写入以上两个路径,或在程序中填写。后者的使用方法见下一节。 + + + ## 创建驱动器对象 Drission 创建的步骤不是必须,若想快速上手,可跳过本节。MixPage 会自动创建该对象。 @@ -453,16 +459,21 @@ do.set_paths(chrome_path='D:\\chrome\\chrome.exe', # 用于 s 模式的设置 session_options = {'headers': {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6)'}} +# 代理设置,可选 +proxy = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'} + # 传入配置,driver_options 和 session_options 都是可选的,须要使用对应模式才须要传入 -drission = Drission(driver_options, session_options) +drission = Drission(driver_options, session_options, proxy=proxy) ``` +DriverOptions 和 SessionOptions 用法详见下文。 + ## 使用页面对象 MixPage MixPage 页面对象封装了常用的网页操作,并实现 driver 和 session 模式之间的切换。 -MixPage 须接收一个 Drission 对象并使用其中的 driver 或 session,如没有传入,MixPage 会自己创建一个(使用默认 ini 文件的配置)。 +MixPage 须控制一个 Drission 对象并使用其中的 driver 或 session,如没有传入,MixPage 会自己创建一个(使用传入的配置信息或从默认 ini 文件读取)。 Tips: 多对象协同工作时,可将一个 MixPage 中的 Drission 对象传递给另一个,使多个对象共享登录信息或操作同一个页面。 @@ -487,8 +498,6 @@ page = MixPage(driver_options=do, session_options=so) # 默认 d 模式 ### 访问网页 -若连接出错,程序会自动重试2次,可指定重试次数和等待间隔。 - ```python # 默认方式 page.get(url) @@ -498,6 +507,8 @@ page.post(url, data, **kwargs) # 只有 session 模式才有 post 方法 page.get(url, retry=5, interval=0.5) ``` +Tips:若连接出错,程序会自动重试2次,可指定重试次数和等待间隔。 + ### 切换模式 @@ -508,6 +519,8 @@ page.get(url, retry=5, interval=0.5) page.change_mode(go=False) # go 为 False 表示不跳转 url ``` +Tips:使用某种模式独有的方法时会自动跳转到该模式。 + ### 页面属性 @@ -536,7 +549,7 @@ page.current_tab_handle # 返回当前标签页 handle 调用只属于 d 模式的方法,会自动切换到 d 模式。详细用法见 APIs。 ```python -page.change_mode() # 切换模式 +page.change_mode() # 切换模式,会自动复制 cookies page.cookies_to_session() # 从 WebDriver 对象复制 cookies 到 Session 对象 page.cookies_to_driver() # 从 Session 对象复制 cookies 到 WebDriver 对象 page.get(url, retry, interval, **kwargs) # 用 get 方式访问网页,可指定重试次数及间隔时间 @@ -558,7 +571,7 @@ page.close_current_tab() # 关闭当前标签页 page.close_other_tabs(num) # 关闭其它标签页 page.to_iframe(iframe) # 切入 iframe page.screenshot(path) # 页面截图 -page.scrool_to_see(element) # 滚动直到某元素可见 +page.scroll_to_see(element) # 滚动直到某元素可见 page.scroll_to(mode, pixel) # 按参数指示方式滚动页面,可选滚动方向:'top', 'bottom', 'rightmost', 'leftmost', 'up', 'down', 'left', 'right' page.refresh() # 刷新当前页面 page.back() # 浏览器后退 @@ -579,9 +592,10 @@ page.eles() 和 element.eles() 查找返回符合条件的所有元素列表。 说明: -- 元素查找超时默认为10秒,你也可以按需要设置。 +- 元素查找超时默认为10秒,超时或找到元素时停止等待,你也可以按需要设置。 - 下面的查找语句中,冒号 : 表示模糊匹配,等号 = 表示精确匹配 -- 查询字符串有 @属性名、tag、text、xpath、css 五种 +- 可用查询字符串或 selenium 原生的 loc 元组(s 模式也能用)查找元素 +- 查询字符串有 @属性名、tag、text、xpath、css、.、# 等7种方法 ```python # 根据属性查找,@ 后面可跟任意属性 @@ -590,6 +604,12 @@ page.eles('@class') # 查找所有拥有 class 属性的元素 page.eles('@class:class_name') # 查找所有 class 含有 ele_class 的元素 page.eles('@class=class_name') # 查找所有 class 等于 ele_class 的元素 +# 根据 class 或 id 查找 +page.ele('#ele_id') # 等价于 page.ele('@id=ele_id') +page.ele('#:ele_id') # 等价于 page.ele('@id:ele_id') +page.ele('.ele_class') # 等价于 page.ele('@class=ele_class') +page.ele('.:ele_class') # 等价于 page.ele('@class:ele_class') + # 根据 tag name 查找 page.ele('tag:li') # 查找第一个 li 元素 page.eles('tag:li') # 查找所有 li 元素 @@ -603,7 +623,7 @@ page.ele('tag:div@text()=search_text') # 查找文本等于 search_text 的 div # 根据文本内容查找 page.ele('search text') # 查找包含传入文本的元素 -page.eles('text:search text') # 如文本以 @、tag:、css:、xpath:、text: 开头,则在前面加上 text: 避免冲突 +page.eles('text:search text') # 如文本以 @、tag:、css:、xpath:、text: 开头,则应在前加上 text: 避免冲突 page.eles('text=search text') # 文本等于 search_text 的元素 # 根据 xpath 或 css selector 查找 @@ -626,7 +646,7 @@ element.parent # 父元素 element.next # 下一个兄弟元素 element.prev # 上一个兄弟元素 -# 获取 shadow-dom,只支持 open 的 shadow-root +# 获取 shadow-root,把它作为元素对待。只支持 open 的 shadow-root ele1 = element.shadow_root.ele('tag:div') # 串连查找 @@ -782,19 +802,30 @@ response = session.get('https://www.baidu.com') ### 连接参数 -除了在创建时传入配置信息及连接参数,如有特别要求,s 模式下也可在每次访问网址时设置连接参数。 +除了在创建时传入配置信息及连接参数,如有必要,s 模式下也可在每次访问网址时设置连接参数。 ```python - +headers = {'User-Agent': '......', } +cookies = {'name': 'value', } +proxies = {'http': '127.0.0.1:1080', 'https': '127.0.0.1:1080'} +page.get(url, headers=headers, cookies=cookies, proxies=proxies) ``` -Tips:如果连接参数内没有指定,s 模式会根据当前域名自动填写 Host 和 Referer 属性。 +Tips: + +- 如果连接参数内没有指定,s 模式会根据当前域名自动填写 Host 和 Referer 属性 +- 在创建 MixPage 时传入的 Session 配置是全局有效的 ### Response 对象 +requests 获取到的 Response 对象存放在 page.response,可直接使用。如: +```python +print(page.response.status_code) +print(page.response.headers) +``` From d10985030cce1056d955c5878e31a48f73a4b466 Mon Sep 17 00:00:00 2001 From: g1879 Date: Sun, 29 Nov 2020 11:56:41 +0800 Subject: [PATCH 15/30] =?UTF-8?q?=E6=B7=BB=E5=8A=A0get=5Fcookies()?= =?UTF-8?q?=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 9 ++++++++- DrissionPage/mix_page.py | 7 +++++++ DrissionPage/session_page.py | 12 ++++++++++-- README.en.md | 16 +++++++++++++++- README.zh-cn.md | 18 +++++++++++++++++- 5 files changed, 57 insertions(+), 5 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 2560dd3..f338672 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -53,13 +53,20 @@ class DriverPage(object): @property def cookies(self) -> list: """返回当前网站cookies""" - return self.driver.get_cookies() + return self.get_cookies(True) @property def title(self) -> str: """返回网页title""" return self.driver.title + def get_cookies(self, as_dict: bool = False) -> Union[list, dict]: + """返回当前网站cookies""" + if as_dict: + return {cookie['name']: cookie['value'] for cookie in self.driver.get_cookies()} + else: + return self.driver.get_cookies() + def _try_to_connect(self, to_url: str, times: int = 0, diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index e7ef113..4fe2b78 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -139,6 +139,13 @@ class MixPage(Null, SessionPage, DriverPage): elif self._mode == 'd': return super(SessionPage, self).title + def get_cookies(self, as_dict: bool = False) -> Union[dict, list]: + """返回cookies""" + if self._mode == 's': + return super().get_cookies(as_dict) + elif self._mode == 'd': + return super(SessionPage, self).get_cookies(as_dict) + def change_mode(self, mode: str = None, go: bool = True) -> None: """切换模式,接收's'或'd',除此以外的字符串会切换为d模式 \n 切换时会把当前模式的cookies复制到目标模式 \n diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index a0c3c5e..755911c 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -16,6 +16,7 @@ from urllib.parse import urlparse, quote, unquote from requests import Session, Response +from .config import _cookie_to_dict from .common import str_to_loc, translate_loc, get_available_file_name, format_html from .session_element import SessionElement, execute_session_find @@ -54,18 +55,25 @@ class SessionPage(object): @property def cookies(self) -> dict: """返回session的cookies""" - return self.session.cookies.get_dict() + return self.get_cookies(True) @property def title(self) -> str: """返回网页title""" - return self.ele(('css selector', 'title')).text + return self.ele('tag:title').text @property def html(self) -> str: """返回页面html文本""" return format_html(self.response.text) + def get_cookies(self, as_dict: bool = False) -> Union[dict, list]: + """返回session的cookies""" + if as_dict: + return self.session.cookies.get_dict() + else: + return [_cookie_to_dict(cookie) for cookie in self.session.cookies] + def ele(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement], mode: str = None) -> Union[SessionElement, List[SessionElement or str], str, None]: diff --git a/README.en.md b/README.en.md index 341380f..395d6d0 100644 --- a/README.en.md +++ b/README.en.md @@ -547,6 +547,7 @@ page.current_tab_handle # Return to the current tab page handle When calling a method that only belongs to d mode, it will automatically switch to d mode. See APIs for detailed usage. ```python +page.get_cookies() # Get cookies, which can be returned by list or dict page.change_mode() # Switch mode, it will automatically copy cookies page.cookies_to_session() # Copy cookies from WebDriver object to Session object page.cookies_to_driver() # Copy cookies from Session object to WebDriver object @@ -887,7 +888,10 @@ options.set_paths(driver_path, chrome_path, debugger_address, download_path, use ### Instructions ```python -do = DriverOptions(read_file=False) # Create chrome configuration object, do not read from ini file +do = DriverOptions() # read the default ini file to create a DriverOptions object +do = DriverOptions('D:\\settings.ini') # read the specified ini file to create a DriverOptions object +do = DriverOptions(read_file=False) # Do not read the ini file, create an empty DriverOptions object + do.set_headless(False) # show the browser interface do.set_no_imgs(True) # Do not load pictures do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # set path @@ -1341,6 +1345,16 @@ Returns: bool +### get_cookies() + +Return cookies. + +Parameter Description: + +- as_dict: bool-Whether to return as dict, the default is to return complete cookies as list + + + ### change_mode() Switch mode,'d' or's'. When switching, the cookies of the current mode will be copied to the target mode. diff --git a/README.zh-cn.md b/README.zh-cn.md index d1554b2..7b4a71c 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -19,6 +19,8 @@ DrissionPage,即 driver 和 session 的合体。 **联系邮箱:** g1879@qq.com +**交流QQ群:**897838127 + # 理念及背景 *** @@ -549,6 +551,7 @@ page.current_tab_handle # 返回当前标签页 handle 调用只属于 d 模式的方法,会自动切换到 d 模式。详细用法见 APIs。 ```python +page.get_cookies() # 获取 cookies,可以 list 或 dict 方式返回 page.change_mode() # 切换模式,会自动复制 cookies page.cookies_to_session() # 从 WebDriver 对象复制 cookies 到 Session 对象 page.cookies_to_driver() # 从 Session 对象复制 cookies 到 WebDriver 对象 @@ -888,7 +891,10 @@ options.set_paths(driver_path, chrome_path, debugger_address, download_path, use ### 使用方法 ```python -do = DriverOptions(read_file=False) # 创建chrome配置对象,不从 ini 文件读取 +do = DriverOptions() # 读取默认 ini 文件创建 DriverOptions 对象 +do = DriverOptions('D:\\settings.ini') # 读取指定 ini 文件创建 DriverOptions 对象 +do = DriverOptions(read_file=False) # 不读取 ini 文件,创建空的 DriverOptions 对象 + do.set_headless(False) # 显示浏览器界面 do.set_no_imgs(True) # 不加载图片 do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # 设置路径 @@ -1356,6 +1362,16 @@ MixPage 封装了页面操作的常用功能,可在 driver 和 session 模式 +### get_cookies() + +返回 cookies。 + +参数说明: + +- as_dict: bool - 是否以 dict 方式返回,默认以 list 返回完整的 cookies + + + ### change_mode() 切换模式,'d' 或 's'。切换时会把当前模式的 cookies 复制到目标模式。 From 4c392aa7077f1cdb56ea65845d331b2ec199fbbf Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 30 Nov 2020 06:56:49 +0800 Subject: [PATCH 16/30] =?UTF-8?q?=E5=AE=8C=E5=96=84cookies=E6=96=B9?= =?UTF-8?q?=E9=9D=A2=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 15 ++++++++++----- DrissionPage/drission.py | 24 ++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index e8f59ff..5ed4c47 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -726,10 +726,15 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio def _cookie_to_dict(cookie: Cookie) -> dict: - # TODO: 其它值? - cookie_dict = {'name': cookie.name, 'value': str(cookie.value), 'path': cookie.path, 'domain': cookie.domain} + # cookie_dict = {'name': cookie.name, 'value': str(cookie.value), 'path': cookie.path, 'domain': cookie.domain} - if cookie.expires: - cookie_dict['expiry'] = cookie.expires + # if cookie.expires: + # cookie_dict['expiry'] = cookie.expires - return cookie_dict + # return cookie_dict + if isinstance(cookie, Cookie): + return cookie.__dict__ + elif isinstance(cookie, dict): + return cookie + else: + raise TypeError diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 3ee6837..4146312 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -155,6 +155,30 @@ class Drission(object): for cookie in cookies: self._ensure_add_cookie(cookie) + def set_cookies(self, + cookies: Union[RequestsCookieJar, list, tuple], + set_session: bool = False, + set_driver: bool = False): + if isinstance(cookies, (list, tuple, RequestsCookieJar)): + cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies) + else: + raise TypeError + + if set_session: + pass + + if set_driver: + pass + + def add_cookie(self): + pass + + def remove_cookie(self): + pass + + def clear_cookies(self): + pass + def _set_session(self, data: dict) -> None: if self._session is None: self._session = Session() From c62b28d50e8da9d7b508e93c2f3ad42b53f1a914 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 1 Dec 2020 00:53:27 +0800 Subject: [PATCH 17/30] =?UTF-8?q?=E5=AE=8C=E5=96=84cookies=E6=96=B9?= =?UTF-8?q?=E9=9D=A2=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 4146312..9d06d60 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -156,24 +156,30 @@ class Drission(object): self._ensure_add_cookie(cookie) def set_cookies(self, - cookies: Union[RequestsCookieJar, list, tuple], + cookies: Union[RequestsCookieJar, list, tuple, str], set_session: bool = False, - set_driver: bool = False): + set_driver: bool = False) -> None: if isinstance(cookies, (list, tuple, RequestsCookieJar)): cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies) + elif isinstance(cookies, str): + cookies = tuple(dict([cookie.split("=", 1)]) for cookie in cookies.split("; ")) + elif isinstance(cookies, dict): + cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) else: raise TypeError - if set_session: - pass + for cookie in cookies: + if set_session: + kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value')} + self.session.cookies.set(cookie['name'], cookie['value'], **kwargs) - if set_driver: - pass + if set_driver: + self.driver.add_cookie(cookie) - def add_cookie(self): + def add_a_cookie(self): pass - def remove_cookie(self): + def remove_a_cookie(self): pass def clear_cookies(self): @@ -187,14 +193,7 @@ class Drission(object): 'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters' if 'cookies' in data: - if isinstance(data['cookies'], (list, tuple)): - for cookie in data['cookies']: - kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value')} - self._session.cookies.set(cookie['name'], cookie['value'], **kwargs) - - elif isinstance(data['cookies'], RequestsCookieJar): - for cookie in data['cookies']: - self._session.cookies.set_cookie(cookie) + self.set_cookies(data['cookies'], set_driver=False) for i in attrs: if i in data: From 7a8558befc8cb0bd0015b4dc20e6ec7fd99699d1 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 1 Dec 2020 17:54:15 +0800 Subject: [PATCH 18/30] =?UTF-8?q?=E7=BB=A7=E7=BB=ADcookies=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 14 +++++++------- DrissionPage/drission.py | 12 +++++++++--- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 5ed4c47..e9de3e5 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -726,14 +726,14 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio def _cookie_to_dict(cookie: Cookie) -> dict: - # cookie_dict = {'name': cookie.name, 'value': str(cookie.value), 'path': cookie.path, 'domain': cookie.domain} - - # if cookie.expires: - # cookie_dict['expiry'] = cookie.expires - - # return cookie_dict + """把Cookie对象转为dict格式""" if isinstance(cookie, Cookie): - return cookie.__dict__ + # return cookie.__dict__ + cookie_dict = {'name': cookie.name, 'value': str(cookie.value), 'path': cookie.path, 'domain': cookie.domain} + + if cookie.expires: + cookie_dict['expiry'] = cookie.expires + elif isinstance(cookie, dict): return cookie else: diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 9d06d60..61cbdd3 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -162,7 +162,7 @@ class Drission(object): if isinstance(cookies, (list, tuple, RequestsCookieJar)): cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies) elif isinstance(cookies, str): - cookies = tuple(dict([cookie.split("=", 1)]) for cookie in cookies.split("; ")) + cookies = tuple(dict([cookie.lstrip().split("=", 1)]) for cookie in cookies.split(";")) elif isinstance(cookies, dict): cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) else: @@ -176,10 +176,16 @@ class Drission(object): if set_driver: self.driver.add_cookie(cookie) - def add_a_cookie(self): + def add_a_cookie(self, + cookie: str, + set_session: bool = False, + set_driver: bool = False): pass - def remove_a_cookie(self): + def remove_a_cookie(self, + name: str, + set_session: bool = False, + set_driver: bool = False): pass def clear_cookies(self): From 3dc2207b0f997e7ce80fe7bbda2776eb2b97b828 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 1 Dec 2020 17:54:33 +0800 Subject: [PATCH 19/30] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 755911c..5ace7cb 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -16,8 +16,8 @@ from urllib.parse import urlparse, quote, unquote from requests import Session, Response -from .config import _cookie_to_dict from .common import str_to_loc, translate_loc, get_available_file_name, format_html +from .config import _cookie_to_dict from .session_element import SessionElement, execute_session_find @@ -382,6 +382,7 @@ class SessionPage(object): # -------------------打印要下载的文件------------------- if show_msg: + print(file_url) print(full_name if file_name == full_name else f'{file_name} -> {full_name}') print(f'Downloading to: {goal_path}') @@ -436,7 +437,7 @@ class SessionPage(object): # -------------------显示并返回值------------------- if show_msg: - print(info) + print(info, '\n') info = f'{goal_path}\\{full_name}' if download_status else info return download_status, info @@ -455,6 +456,11 @@ class SessionPage(object): :param kwargs: 其它参数 :return: tuple,第一位为Response或None,第二位为出错信息或'Sussess' """ + if not str: + if show_errmsg: + raise ValueError('url is empty.') + return None, 'url is empty.' + if mode not in ['get', 'post']: raise ValueError("Argument mode can only be 'get' or 'post'.") From c97e4bc37a3a75454d8966e92043505aad2e5b33 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 2 Dec 2020 17:54:05 +0800 Subject: [PATCH 20/30] =?UTF-8?q?=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 4 ---- README.zh-cn.md | 6 +----- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/README.en.md b/README.en.md index 395d6d0..54bc039 100644 --- a/README.en.md +++ b/README.en.md @@ -1191,8 +1191,6 @@ Copy the cookies of the driver object to the session object. Parameter Description: - copy_user_agent: bool - whether to copy user_agent to session -- driver: WebDriver- Copy the WebDriver object of cookies -- session: Session- Session object that receives cookies Returns: None @@ -1205,8 +1203,6 @@ Copy cookies from session to driver. Parameter Description: - url: str - the domain of cookies -- driver: WebDriver- WebDriver object that receives cookies -- session: Session- Copy the Session object of cookies Returns: None diff --git a/README.zh-cn.md b/README.zh-cn.md index 7b4a71c..0953bef 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -19,7 +19,7 @@ DrissionPage,即 driver 和 session 的合体。 **联系邮箱:** g1879@qq.com -**交流QQ群:**897838127 +**交流QQ群:** 897838127 # 理念及背景 @@ -1208,8 +1208,6 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角 参数说明: - copy_user_agent: bool - 是否复制 user_agent 到 session -- driver: WebDriver - 复制 cookies 的 WebDriver 对象 -- session: Session - 接收 cookies 的 Session 对象 返回: None @@ -1222,8 +1220,6 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角 参数说明: - url: str - cookies 的域 -- driver: WebDriver - 接收 cookies 的 WebDriver 对象 -- session: Session - 复制 cookies 的 Session 对象 返回: None From 237ed9607e542b624541ab4c9f55401f4c6454fd Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 2 Dec 2020 17:55:32 +0800 Subject: [PATCH 21/30] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 5ace7cb..4959bfd 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -387,7 +387,7 @@ class SessionPage(object): print(f'Downloading to: {goal_path}') if skip: - print('Skipped.') + print('Skipped.\n') # -------------------开始下载------------------- if skip: @@ -456,7 +456,7 @@ class SessionPage(object): :param kwargs: 其它参数 :return: tuple,第一位为Response或None,第二位为出错信息或'Sussess' """ - if not str: + if not url: if show_errmsg: raise ValueError('url is empty.') return None, 'url is empty.' From bfd967edf0631fde3e7b8836bd1eeabffcc5d885 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 2 Dec 2020 17:55:56 +0800 Subject: [PATCH 22/30] =?UTF-8?q?=E7=BB=A7=E7=BB=AD=E4=BF=AE=E6=94=B9cooki?= =?UTF-8?q?es=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 14 ++- DrissionPage/drission.py | 204 +++++++++++++++++++++++---------------- 2 files changed, 129 insertions(+), 89 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index e9de3e5..a0fd752 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -727,12 +727,16 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio def _cookie_to_dict(cookie: Cookie) -> dict: """把Cookie对象转为dict格式""" + # print(cookie) if isinstance(cookie, Cookie): - # return cookie.__dict__ - cookie_dict = {'name': cookie.name, 'value': str(cookie.value), 'path': cookie.path, 'domain': cookie.domain} - - if cookie.expires: - cookie_dict['expiry'] = cookie.expires + cookie_dict = cookie.__dict__.copy() + cookie_dict.pop('rfc2109') + cookie_dict.pop('_rest') + return cookie_dict + # cookie_dict = {'name': cookie.name, 'value': str(cookie.value), 'path': cookie.path, 'domain': cookie.domain} + # + # if cookie.expires: + # cookie_dict['expiry'] = cookie.expires elif isinstance(cookie, dict): return cookie diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 61cbdd3..19acd0f 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -6,12 +6,11 @@ """ from sys import exit from typing import Union -from urllib.parse import urlparse from requests import Session from requests.cookies import RequestsCookieJar from selenium import webdriver -from selenium.common.exceptions import WebDriverException, SessionNotCreatedException +from selenium.common.exceptions import SessionNotCreatedException, UnableToSetCookieException # , WebDriverException from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver from tldextract import extract @@ -20,6 +19,9 @@ from .config import (_dict_to_chrome_options, _session_options_to_dict, SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookie_to_dict) +# from urllib.parse import urlparse + + class Drission(object): """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色""" @@ -153,12 +155,19 @@ class Drission(object): self._driver.get(url) for cookie in cookies: - self._ensure_add_cookie(cookie) + # self._ensure_add_cookie(cookie) + self.set_cookies(cookie, set_driver=True) def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str], set_session: bool = False, set_driver: bool = False) -> None: + """ + :param cookies: + :param set_session: + :param set_driver: + :return: + """ if isinstance(cookies, (list, tuple, RequestsCookieJar)): cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies) elif isinstance(cookies, str): @@ -169,12 +178,34 @@ class Drission(object): raise TypeError for cookie in cookies: + if cookie['value'] is None: + cookie['value'] = '' + if set_session: - kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value')} + kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value', 'httpOnly', 'expiry')} + + if 'expiry' in cookie: + kwargs['expires'] = cookie['expiry'] + self.session.cookies.set(cookie['name'], cookie['value'], **kwargs) if set_driver: - self.driver.add_cookie(cookie) + + try: + self.driver.add_cookie(cookie) + + except UnableToSetCookieException: + cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] + + try: + browser_domain = extract(self.driver.current_url).fqdn + except AttributeError: + browser_domain = '' + + if cookie_domain not in browser_domain: + self.driver.get(f'http://{cookie_domain.lstrip("http://")}') + + self.driver.add_cookie(cookie) def add_a_cookie(self, cookie: str, @@ -199,103 +230,108 @@ class Drission(object): 'cert', 'stream', 'trust_env', 'max_redirects'] # , 'adapters' if 'cookies' in data: - self.set_cookies(data['cookies'], set_driver=False) + self.set_cookies(data['cookies'], set_session=True) for i in attrs: if i in data: self._session.__setattr__(i, data[i]) - def cookies_to_session(self, copy_user_agent: bool = False, - driver: WebDriver = None, - session: Session = None) -> None: + def cookies_to_session(self, copy_user_agent: bool = False) -> None: """把driver对象的cookies复制到session对象 \n :param copy_user_agent: 是否复制ua信息 - :param driver: 来源driver对象 - :param session: 目标session对象 :return: None """ - driver = driver or self.driver - session = session or self.session - if copy_user_agent: - self.user_agent_to_session(driver, session) + self.user_agent_to_session(self.driver, self.session) - for cookie in driver.get_cookies(): - session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain']) + self.set_cookies(self.driver.get_cookies(), set_session=True) + # for cookie in driver.get_cookies(): + # session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain']) - def cookies_to_driver(self, url: str, - driver: WebDriver = None, - session: Session = None) -> None: + def cookies_to_driver(self, url: str) -> None: """把session对象的cookies复制到driver对象 \n :param url: 作用域 - :param driver: 目标driver对象 - :param session: 来源session对象 :return: None """ - driver = driver or self.driver - session = session or self.session - domain = urlparse(url).netloc + url = extract(url) + domain = f'{url.domain}.{url.suffix}' + cookies = tuple(x for x in self.session.cookies if domain in x.domain) - if not domain: - raise Exception('Without specifying a domain') + self.set_cookies(cookies, set_driver=True) - # 翻译cookies - for i in [x for x in session.cookies if domain in x.domain]: - cookie_data = _cookie_to_dict(i) - self._ensure_add_cookie(cookie_data, driver=driver) - - def _ensure_add_cookie(self, cookie, override_domain=None, driver=None) -> None: - """添加cookie到driver \n - :param cookie: 要添加的cookie - :param override_domain: 覆盖作用域 - :param driver: 操作的driver对象 - :return: None - """ - driver = driver or self.driver - - if override_domain: - cookie['domain'] = override_domain - - cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] - - try: - browser_domain = extract(driver.current_url).fqdn - except AttributeError: - browser_domain = '' - - if cookie_domain not in browser_domain: - driver.get(f'http://{cookie_domain.lstrip("http://")}') - - if 'expiry' in cookie: - cookie['expiry'] = int(cookie['expiry']) - - driver.add_cookie(cookie) - - # 如果添加失败,尝试更宽的域名 - if not self._is_cookie_in_driver(cookie, driver): - cookie['domain'] = extract(cookie['domain']).registered_domain - driver.add_cookie(cookie) - - if not self._is_cookie_in_driver(cookie): - raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n") - - def _is_cookie_in_driver(self, cookie, driver=None) -> bool: - """检查cookie是否已经在driver里 \n - 只检查name、value、domain,检查domain时比较宽 \n - :param cookie: 要检查的cookie - :param driver: 被检查的driver - :return: 返回布尔值 - """ - driver = driver or self.driver - for driver_cookie in driver.get_cookies(): - - if (cookie['name'] == driver_cookie['name'] and - cookie['value'] == driver_cookie['value'] and - (cookie['domain'] == driver_cookie['domain'] or - f'.{cookie["domain"]}' == driver_cookie['domain'])): - return True - - return False + # def cookies_to_driver(self, url: str, + # driver: WebDriver = None, + # session: Session = None) -> None: + # """把session对象的cookies复制到driver对象 \n + # :param url: 作用域 + # :param driver: 目标driver对象 + # :param session: 来源session对象 + # :return: None + # """ + # driver = driver or self.driver + # session = session or self.session + # domain = urlparse(url).netloc + # + # if not domain: + # raise Exception('Without specifying a domain') + # + # # 翻译cookies + # for i in [x for x in session.cookies if domain in x.domain]: + # cookie_data = _cookie_to_dict(i) + # self._ensure_add_cookie(cookie_data, driver=driver) + # + # def _ensure_add_cookie(self, cookie, override_domain=None, driver=None) -> None: + # """添加cookie到driver \n + # :param cookie: 要添加的cookie + # :param override_domain: 覆盖作用域 + # :param driver: 操作的driver对象 + # :return: None + # """ + # driver = driver or self.driver + # + # if override_domain: + # cookie['domain'] = override_domain + # + # cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] + # + # try: + # browser_domain = extract(driver.current_url).fqdn + # except AttributeError: + # browser_domain = '' + # + # if cookie_domain not in browser_domain: + # driver.get(f'http://{cookie_domain.lstrip("http://")}') + # + # if 'expiry' in cookie: + # cookie['expiry'] = int(cookie['expiry']) + # + # driver.add_cookie(cookie) + # + # # 如果添加失败,尝试更宽的域名 + # if not self._is_cookie_in_driver(cookie, driver): + # cookie['domain'] = extract(cookie['domain']).registered_domain + # driver.add_cookie(cookie) + # + # if not self._is_cookie_in_driver(cookie): + # raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n") + # + # def _is_cookie_in_driver(self, cookie, driver=None) -> bool: + # """检查cookie是否已经在driver里 \n + # 只检查name、value、domain,检查domain时比较宽 \n + # :param cookie: 要检查的cookie + # :param driver: 被检查的driver + # :return: 返回布尔值 + # """ + # driver = driver or self.driver + # for driver_cookie in driver.get_cookies(): + # + # if (cookie['name'] == driver_cookie['name'] and + # cookie['value'] == driver_cookie['value'] and + # (cookie['domain'] == driver_cookie['domain'] or + # f'.{cookie["domain"]}' == driver_cookie['domain'])): + # return True + # + # return False def user_agent_to_session(self, driver: WebDriver = None, session: Session = None) -> None: """把driver的user-agent复制到session \n From 63f80886ef0efeb4c661a432db9c80594f13ce43 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 2 Dec 2020 22:34:54 +0800 Subject: [PATCH 23/30] =?UTF-8?q?=E5=9F=BA=E6=9C=AC=E5=AE=8C=E5=96=84cooki?= =?UTF-8?q?es=E6=96=B9=E9=9D=A2=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 114 ++++------------------------------- DrissionPage/mix_page.py | 25 +++++++- DrissionPage/session_page.py | 22 +++++-- README.en.md | 35 ++++++++++- README.zh-cn.md | 32 +++++++++- 5 files changed, 115 insertions(+), 113 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 19acd0f..2335225 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -10,7 +10,7 @@ from typing import Union from requests import Session from requests.cookies import RequestsCookieJar from selenium import webdriver -from selenium.common.exceptions import SessionNotCreatedException, UnableToSetCookieException # , WebDriverException +from selenium.common.exceptions import SessionNotCreatedException, UnableToSetCookieException from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver from tldextract import extract @@ -19,9 +19,6 @@ from .config import (_dict_to_chrome_options, _session_options_to_dict, SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookie_to_dict) -# from urllib.parse import urlparse - - class Drission(object): """Drission类用于管理WebDriver对象和Session对象,是驱动器的角色""" @@ -155,18 +152,17 @@ class Drission(object): self._driver.get(url) for cookie in cookies: - # self._ensure_add_cookie(cookie) self.set_cookies(cookie, set_driver=True) def set_cookies(self, - cookies: Union[RequestsCookieJar, list, tuple, str], + cookies: Union[RequestsCookieJar, list, tuple, str, dict], set_session: bool = False, set_driver: bool = False) -> None: - """ - :param cookies: - :param set_session: - :param set_driver: - :return: + """设置cookies \n + :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict + :param set_session: 是否设置session的cookies + :param set_driver: 是否设置driver的cookies + :return: None """ if isinstance(cookies, (list, tuple, RequestsCookieJar)): cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies) @@ -190,6 +186,8 @@ class Drission(object): self.session.cookies.set(cookie['name'], cookie['value'], **kwargs) if set_driver: + if 'expiry' in cookie: + cookie['expiry'] = int(cookie['expiry']) try: self.driver.add_cookie(cookie) @@ -203,25 +201,11 @@ class Drission(object): browser_domain = '' if cookie_domain not in browser_domain: - self.driver.get(f'http://{cookie_domain.lstrip("http://")}') + self.driver.get(cookie_domain if cookie_domain.startswith('http://') + else f'http://{cookie_domain}') self.driver.add_cookie(cookie) - def add_a_cookie(self, - cookie: str, - set_session: bool = False, - set_driver: bool = False): - pass - - def remove_a_cookie(self, - name: str, - set_session: bool = False, - set_driver: bool = False): - pass - - def clear_cookies(self): - pass - def _set_session(self, data: dict) -> None: if self._session is None: self._session = Session() @@ -245,8 +229,6 @@ class Drission(object): self.user_agent_to_session(self.driver, self.session) self.set_cookies(self.driver.get_cookies(), set_session=True) - # for cookie in driver.get_cookies(): - # session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain']) def cookies_to_driver(self, url: str) -> None: """把session对象的cookies复制到driver对象 \n @@ -259,80 +241,6 @@ class Drission(object): self.set_cookies(cookies, set_driver=True) - # def cookies_to_driver(self, url: str, - # driver: WebDriver = None, - # session: Session = None) -> None: - # """把session对象的cookies复制到driver对象 \n - # :param url: 作用域 - # :param driver: 目标driver对象 - # :param session: 来源session对象 - # :return: None - # """ - # driver = driver or self.driver - # session = session or self.session - # domain = urlparse(url).netloc - # - # if not domain: - # raise Exception('Without specifying a domain') - # - # # 翻译cookies - # for i in [x for x in session.cookies if domain in x.domain]: - # cookie_data = _cookie_to_dict(i) - # self._ensure_add_cookie(cookie_data, driver=driver) - # - # def _ensure_add_cookie(self, cookie, override_domain=None, driver=None) -> None: - # """添加cookie到driver \n - # :param cookie: 要添加的cookie - # :param override_domain: 覆盖作用域 - # :param driver: 操作的driver对象 - # :return: None - # """ - # driver = driver or self.driver - # - # if override_domain: - # cookie['domain'] = override_domain - # - # cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] - # - # try: - # browser_domain = extract(driver.current_url).fqdn - # except AttributeError: - # browser_domain = '' - # - # if cookie_domain not in browser_domain: - # driver.get(f'http://{cookie_domain.lstrip("http://")}') - # - # if 'expiry' in cookie: - # cookie['expiry'] = int(cookie['expiry']) - # - # driver.add_cookie(cookie) - # - # # 如果添加失败,尝试更宽的域名 - # if not self._is_cookie_in_driver(cookie, driver): - # cookie['domain'] = extract(cookie['domain']).registered_domain - # driver.add_cookie(cookie) - # - # if not self._is_cookie_in_driver(cookie): - # raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n") - # - # def _is_cookie_in_driver(self, cookie, driver=None) -> bool: - # """检查cookie是否已经在driver里 \n - # 只检查name、value、domain,检查domain时比较宽 \n - # :param cookie: 要检查的cookie - # :param driver: 被检查的driver - # :return: 返回布尔值 - # """ - # driver = driver or self.driver - # for driver_cookie in driver.get_cookies(): - # - # if (cookie['name'] == driver_cookie['name'] and - # cookie['value'] == driver_cookie['value'] and - # (cookie['domain'] == driver_cookie['domain'] or - # f'.{cookie["domain"]}' == driver_cookie['domain'])): - # return True - # - # return False - def user_agent_to_session(self, driver: WebDriver = None, session: Session = None) -> None: """把driver的user-agent复制到session \n :param driver: 来源driver对象 diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 4fe2b78..8ae6078 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -7,6 +7,7 @@ from typing import Union, List, Tuple from requests import Response, Session +from requests.cookies import RequestsCookieJar from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement @@ -139,10 +140,24 @@ class MixPage(Null, SessionPage, DriverPage): elif self._mode == 'd': return super(SessionPage, self).title - def get_cookies(self, as_dict: bool = False) -> Union[dict, list]: - """返回cookies""" + def set_cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: + """设置cookies \n + :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict + :return: None + """ if self._mode == 's': - return super().get_cookies(as_dict) + self.drission.set_cookies(cookies, set_session=True) + elif self._mode == 'd': + self.drission.set_cookies(cookies, set_driver=True) + + def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: + """返回cookies \n + :param as_dict: 是否以字典方式返回 + :param all_domains: 是否返回所有域的cookies + :return: cookies信息 + """ + if self._mode == 's': + return super().get_cookies(as_dict, all_domains) elif self._mode == 'd': return super(SessionPage, self).get_cookies(as_dict) @@ -162,8 +177,10 @@ class MixPage(Null, SessionPage, DriverPage): if self._mode == 'd': self._driver = True self._url = None if not self._driver else self._drission.driver.current_url + if self._session_url: self.cookies_to_driver(self._session_url) + if go: self.get(self._session_url) @@ -171,8 +188,10 @@ class MixPage(Null, SessionPage, DriverPage): elif self._mode == 's': self._session = True self._url = self._session_url + if self._driver: self.cookies_to_session() + if go and self._drission.driver.current_url.startswith('http'): self.get(self._drission.driver.current_url) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 4959bfd..e99d95f 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -15,6 +15,7 @@ from typing import Union, List, Tuple from urllib.parse import urlparse, quote, unquote from requests import Session, Response +from tldextract import extract from .common import str_to_loc, translate_loc, get_available_file_name, format_html from .config import _cookie_to_dict @@ -67,12 +68,23 @@ class SessionPage(object): """返回页面html文本""" return format_html(self.response.text) - def get_cookies(self, as_dict: bool = False) -> Union[dict, list]: - """返回session的cookies""" - if as_dict: - return self.session.cookies.get_dict() + def get_cookies(self, as_dict: bool = False, all_domains: bool = False) -> Union[dict, list]: + """返回cookies \n + :param as_dict: 是否以字典方式返回 + :param all_domains: 是否返回所有域的cookies + :return: cookies信息 + """ + if all_domains: + cookies = self.session.cookies else: - return [_cookie_to_dict(cookie) for cookie in self.session.cookies] + url = extract(self.url) + domain = f'{url.domain}.{url.suffix}' + cookies = tuple(x for x in self.session.cookies if domain in x.domain) + + if as_dict: + return {x.name: x.value for x in cookies} + else: + return [_cookie_to_dict(cookie) for cookie in cookies] def ele(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement], diff --git a/README.en.md b/README.en.md index 54bc039..c27010e 100644 --- a/README.en.md +++ b/README.en.md @@ -547,6 +547,7 @@ page.current_tab_handle # Return to the current tab page handle When calling a method that only belongs to d mode, it will automatically switch to d mode. See APIs for detailed usage. ```python +page.set_cookies() # set cookies page.get_cookies() # Get cookies, which can be returned by list or dict page.change_mode() # Switch mode, it will automatically copy cookies page.cookies_to_session() # Copy cookies from WebDriver object to Session object @@ -1196,6 +1197,22 @@ Returns: None +### set_cookies() + +Set cookies. + +Parameter Description: + +- cookies: Union[RequestsCookieJar, list, tuple, str, dict]-cookies information, can be CookieJar, list, tuple, str, dict + +- set_session: bool-whether to set session cookies + +- set_driver: bool-whether to set driver cookies + +Returns: None + + + ### cookies_to_driver() Copy cookies from session to driver. @@ -1341,13 +1358,29 @@ Returns: bool +### set_cookies() + +Set cookies. + +Parameter Description: + +- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies information, can be CookieJar, list, tuple, str, dict + +Returns: None + + + ### get_cookies() Return cookies. Parameter Description: -- as_dict: bool-Whether to return as dict, the default is to return complete cookies as list +- as_dict: bool - Whether to return as dict, the default is to return complete cookies as list + +- all_domains: bool - whether to return cookies of all domains, only valid in s mode + +Returns: a dictionary or list of cookies diff --git a/README.zh-cn.md b/README.zh-cn.md index 0953bef..af87651 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -551,6 +551,7 @@ page.current_tab_handle # 返回当前标签页 handle 调用只属于 d 模式的方法,会自动切换到 d 模式。详细用法见 APIs。 ```python +page.set_cookies() # 设置cookies page.get_cookies() # 获取 cookies,可以 list 或 dict 方式返回 page.change_mode() # 切换模式,会自动复制 cookies page.cookies_to_session() # 从 WebDriver 对象复制 cookies 到 Session 对象 @@ -1201,6 +1202,20 @@ Drission 类用于管理 WebDriver 对象和 Session 对象,是驱动器的角 +### set_cookies() + +设置 cookies。 + +参数说明: + +- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies 信息,可为CookieJar, list, tuple, str, dict +- set_session: bool - 是否设置 session 的 cookies +- set_driver: bool - 是否设置 driver 的 cookies + +返回: None + + + ### cookies_to_session() 把 driver 对象的 cookies 复制到 session 对象。 @@ -1358,13 +1373,28 @@ MixPage 封装了页面操作的常用功能,可在 driver 和 session 模式 +### set_cookies() + +设置 cookies。 + +参数说明: + +- cookies: Union[RequestsCookieJar, list, tuple, str, dict] - cookies 信息,可为CookieJar, list, tuple, str, dict + +返回: None + + + ### get_cookies() 返回 cookies。 参数说明: -- as_dict: bool - 是否以 dict 方式返回,默认以 list 返回完整的 cookies +- as_dict: bool - 是否以 dict 方式返回,默认以 list 返回完整的 cookies +- all_domains: bool - 是否返回所有域名的 cookies,只有 s 模式下生效 + +返回:cookies 字典或列表 From d50be35a0aff7b0866cdeadee3c19cad67bcfe79 Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 2 Dec 2020 23:19:23 +0800 Subject: [PATCH 24/30] =?UTF-8?q?=E5=AE=8C=E5=96=84SessionOptions=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 37 +++++++++++++++---------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index a0fd752..38c5cf3 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -201,6 +201,7 @@ class SessionOptions(object): """返回cookies设置信息""" if self._cookies is None: self._cookies = [] + return self._cookies @property @@ -213,6 +214,7 @@ class SessionOptions(object): """返回proxies设置信息""" if self._proxies is None: self._proxies = {} + return self._proxies @property @@ -220,6 +222,7 @@ class SessionOptions(object): """返回hooks设置信息""" if self._hooks is None: self._hooks = {} + return self._hooks @property @@ -268,7 +271,7 @@ class SessionOptions(object): self._headers = {key.lower(): headers[key] for key in headers} @cookies.setter - def cookies(self, cookies: Union[list, tuple]) -> None: + def cookies(self, cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> None: """设置cookies参数 \n :param cookies: 参数值 :return: None @@ -381,16 +384,6 @@ class SessionOptions(object): return self - def add_cookie(self, cookie): - pass - - def remove_cookie(self, name: str): - pass - - def clear_cookies(self): - """清空cookies""" - self.cookies = None - def save(self, path: str = None): """保存设置到文件 \n :param path: ini文件的路径,传入 'default' 保存到默认ini文件 @@ -420,6 +413,7 @@ class SessionOptions(object): return self def as_dict(self) -> dict: + """以字典形式返回本对象""" return _session_options_to_dict(self) @@ -696,7 +690,7 @@ def _chrome_options_to_dict(options: Union[dict, DriverOptions, Options, None]) def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Union[dict, None]: - """把session配置对象装换为字典 \n + """把session配置对象转换为字典 \n :param options: session配置对象或字典 :return: 配置字典 """ @@ -706,12 +700,13 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio re_dict = dict() attrs = ['headers', 'proxies', 'hooks', 'params', 'verify', 'stream', 'trust_env', 'max_redirects'] # 'adapters', - val = options.__getattribute__(f'_cookies') + val = options.__getattribute__('_cookies') + if val is not None: - if isinstance(val, (list, tuple)): - re_dict['cookies'] = val - elif isinstance(val, RequestsCookieJar): + if isinstance(val, RequestsCookieJar): re_dict['cookies'] = [_cookie_to_dict(cookie) for cookie in val] + else: + re_dict['cookies'] = val for attr in attrs: val = options.__getattribute__(f'_{attr}') @@ -726,17 +721,15 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio def _cookie_to_dict(cookie: Cookie) -> dict: - """把Cookie对象转为dict格式""" - # print(cookie) + """把Cookie对象转为dict格式 \n + :param cookie: Cookie对象 + :return: cookie字典 + """ if isinstance(cookie, Cookie): cookie_dict = cookie.__dict__.copy() cookie_dict.pop('rfc2109') cookie_dict.pop('_rest') return cookie_dict - # cookie_dict = {'name': cookie.name, 'value': str(cookie.value), 'path': cookie.path, 'domain': cookie.domain} - # - # if cookie.expires: - # cookie_dict['expiry'] = cookie.expires elif isinstance(cookie, dict): return cookie From 2116ff6c4b0db1ef99eca0b075722027f06779c1 Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 3 Dec 2020 17:12:05 +0800 Subject: [PATCH 25/30] =?UTF-8?q?=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.en.md | 197 +++++++++++++++++++++++++++++++++++++++++++++++- README.zh-cn.md | 186 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 375 insertions(+), 8 deletions(-) diff --git a/README.en.md b/README.en.md index c27010e..31f7923 100644 --- a/README.en.md +++ b/README.en.md @@ -859,7 +859,7 @@ page.download(url, save_path,'img','rename', show_msg=True) -## Chrome Quick Settings +## Chrome Settings The configuration of chrome is very cumbersome. In order to simplify the use, this library provides setting methods for common configurations. @@ -899,14 +899,50 @@ do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') # do.set_headless(False).set_no_imgs(True) # Support chain operation drission = Drission(driver_options=do) # Create Drission object with configuration object -page = MixPage(drission) # Create MixPage object with Drission object +page = MixPage(driver_options=do) # Create MixPage object with configuration object -do.save() # Save the currently opened ini file +do.save() # save the currently opened ini file +do.save('D:\\settings.ini') # save to the specified ini file do.save('default') # Save the current settings to the default ini file ``` +## Session Settings + +### SessionOPtions Object + +The SessionOptions object is used to manage the configuration information of the Session. It reads the default ini file configuration information by default when it is created, or you can manually set the required information. + +Configurable properties: + +headers, cookies, auth, proxies, hooks, params, verify, cert, adapters, stream, trust_env, max_redirects. + +**Tips:** cookies can receive information in dict, list, tuple, str, RequestsCookieJar and other formats. + + + +### Instructions + +```python +so = SessionOptions() # read the default ini file to create a SessionOptions object +so = SessionOptions('D:\\settings.ini') # read the specified ini file to create a SessionOptions object +so = SessionOptions(read_file=False) # Do not read the ini file, create an empty SessionOptions object + +so.cookies = ['key1=val1; domain=xxxx','key2=val2; domain=xxxx'] # set cookies +so.headers = {'User-Agent':'xxxx','Accept-Charset':'xxxx'} +so.set_a_header('Connection','keep-alive') + +drission = Drission(session_options=so) # Create Drission object with configuration object +page = MixPage(session_options=so) # Create MixPage object with configuration object + +so.save() # Save the currently opened ini file +so.save('D:\\settings.ini') # save to the specified ini file +so.save('default') # Save the current settings to the default ini file +``` + + + ## Save configuration Because there are many configurations of chrome and headers, an ini file is set up specifically to save common configurations. You can use the OptionsManager object to get and save the configuration, and use the DriverOptions object to modify the chrome configuration. You can also save multiple ini files and call them according to different projects. @@ -2708,6 +2744,161 @@ Return: OptionsManager - return to yourself +## SessionOptions class + +### class SessionOptions() + +Session object configuration class. + +Parameter Description: + +-read_file: bool-whether to read configuration information from ini file when creating +-ini_path: str-the path of the ini file, if it is None, the default ini file will be read + + + +### headers + +headers configuration information. + +Returns: dict + + + +### cookies + +Cookies configuration information. + +Returns: list + + + +### auth + +auth configuration information. + +Returns: tuple + + + +### proxies + +proxies configuration information. + +Returns: dict + + + +### hooks + +hooks configuration information. + +Returns: dict + + + +### params + +params configuration information. + +Returns: dict + + + +### verify + +Verify configuration information. + +Returns: bool + + + +### cert + +cert configuration information. + +Returns: [str, tuple] + + + +### adapters + +Adapters configuration information. + +Returns: adapters + + + +### stream + +stream configuration information. + +Returns: bool + + + +### trust_env + +srust_env configuration information. + +Returns: bool + + + +### max_redirects + +max_redirect configuration information. + +Returns: int + + + +### set_a_header() + +Set an item in headers. + +Parameter Description: + +- attr: str-configuration item name + +- value: str-configured value + +Returns: the current object + + + +### remove_a_header() + +Remove a setting from headers. + +Parameter Description: + +- attr: str-the name of the configuration to be deleted + +Returns: current object + + + +### save() + +Save the settings to a file. + +Parameter Description: + +- path: str-the path of the ini file, pass in'default' and save to the default ini file + +Returns: current object + + + +### as_dict() + +Return the current object as a dictionary. + +Returns: dict + + + ## DriverOptions class ### class DriverOptions() diff --git a/README.zh-cn.md b/README.zh-cn.md index af87651..dc1dabc 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -17,9 +17,9 @@ DrissionPage,即 driver 和 session 的合体。 **示例地址:** [使用DrissionPage的网页自动化及爬虫示例](https://gitee.com/g1879/DrissionPage-demos) -**联系邮箱:** g1879@qq.com +**联系邮箱:** g1879@qq.com -**交流QQ群:** 897838127 +**交流QQ群:** 897838127 # 理念及背景 @@ -902,9 +902,10 @@ do.set_paths(driver_path='D:\\chromedriver.exe', chrome_path='D:\\chrome.exe') do.set_headless(False).set_no_imgs(True) # 支持链式操作 drission = Drission(driver_options=do) # 用配置对象创建 Drission 对象 -page = MixPage(drission) # 用Drission对象创建 MixPage 对象 +page = MixPage(driver_options=do) # 用配置对象创建 MixPage 对象 do.save() # 保存当前打开的 ini 文件 +do.save('D:\\settings.ini') # 保存到指定的 ini 文件 do.save('default') # 保存当前设置到默认 ini 文件 ``` @@ -912,16 +913,37 @@ do.save('default') # 保存当前设置到默认 ini 文件 ## Session 设置 - - ### SessionOPtions 对象 SessionOptions 对象用于管理 Session 的配置信息。它创建时默认读取默认 ini 文件配置信息,也可手动设置所需信息。 +可配置的属性: + +headers、cookies、auth、proxies、hooks、params、verify、cert、adapters、stream、trust_env、max_redirects。 + +**Tips:** cookies 可接收 dict、list、tuple、str、RequestsCookieJar 等格式的信息。 + ### 使用方法 +```python +so = SessionOptions() # 读取默认 ini 文件创建 SessionOptions 对象 +so = SessionOptions('D:\\settings.ini') # 读取指定 ini 文件创建 SessionOptions 对象 +so = SessionOptions(read_file=False) # 不读取 ini 文件,创建空的 SessionOptions 对象 + +so.cookies = ['key1=val1; domain=xxxx', 'key2=val2; domain=xxxx'] # 设置 cookies +so.headers = {'User-Agent': 'xxxx', 'Accept-Charset': 'xxxx'} +so.set_a_header('Connection', 'keep-alive') + +drission = Drission(session_options=so) # 用配置对象创建 Drission 对象 +page = MixPage(session_options=so) # 用配置对象创建 MixPage 对象 + +so.save() # 保存当前打开的 ini 文件 +so.save('D:\\settings.ini') # 保存到指定的 ini 文件 +so.save('default') # 保存当前设置到默认 ini 文件 +``` + @@ -2684,6 +2706,160 @@ shadow-root 所依赖的父元素。 +## SessionOptions 类 + +### class SessionOptions() + +Session 对象配置类。 + +参数说明: + +- read_file: bool - 创建时是否从 ini 文件读取配置信息 +- ini_path: str - ini 文件路径,为None则读取默认 ini 文件 + + + +### headers + +headers 配置信息。 + +返回: dict + + + +### cookies + +cookies 配置信息。 + +返回: list + + + +### auth + +auth 配置信息。 + +返回: tuple + + + +### proxies + +proxies 配置信息。 + +返回: dict + + + +### hooks + +hooks 配置信息。 + +返回: dict + + + +### params + +params 配置信息。 + +返回: dict + + + +### verify + +verify 配置信息。 + +返回: bool + + + +### cert + +cert 配置信息。 + +返回: [str, tuple] + + + +### adapters + +adapters 配置信息。 + +返回: adapters + + + +### stream + +stream 配置信息。 + +返回: bool + + + +### trust_env + +srust_env 配置信息。 + +返回: bool + + + +### max_redirects + +max_redirect 配置信息。 + +返回: int + + + +### set_a_header() + +设置 headers 中一个项。 + +参数说明: + +- attr: str - 配置项名称 +- value: str - 配置的值 + +返回: 当前对象 + + + +### remove_a_header() + +从 headers 中删除一个设置。 + +参数说明: + +- attr: str - 要删除的配置名称 + +返回:当前对象 + + + +### save() + +保存设置到文件。 + +参数说明: + +- path: str - ini文件的路径,传入 'default' 保存到默认ini文件 + +返回:当前对象 + + + +### as_dict() + +以字典形式返回当前对象。 + +返回: dict + + + ## DriverOptions 类 ### class DriverOptions() From e5e7c57b32558c5e004a375ace248c7ac9187cff Mon Sep 17 00:00:00 2001 From: g1879 Date: Thu, 3 Dec 2020 17:13:02 +0800 Subject: [PATCH 26/30] =?UTF-8?q?=E5=9F=BA=E6=9C=AC=E5=AE=8C=E6=88=90cooki?= =?UTF-8?q?es=E7=9B=B8=E5=85=B3=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 57 ++++++++++++++++++++++++++++++++-------- DrissionPage/drission.py | 37 ++++++++++---------------- 2 files changed, 60 insertions(+), 34 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 38c5cf3..3d331c1 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -358,8 +358,8 @@ class SessionOptions(object): """ self._max_redirects = max_redirects - def set_header(self, attr: str, value: str): - """设置header中一个项 \n + def set_a_header(self, attr: str, value: str): + """设置headers中一个项 \n :param attr: 设置名称 :param value: 设置值 :return: 返回当前对象 @@ -370,7 +370,7 @@ class SessionOptions(object): self._headers[attr.lower()] = value return self - def remove_header(self, attr: str): + def remove_a_header(self, attr: str): """从headers中删除一个设置 \n :param attr: 要删除的设置 :return: 返回当前对象 @@ -700,13 +700,10 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio re_dict = dict() attrs = ['headers', 'proxies', 'hooks', 'params', 'verify', 'stream', 'trust_env', 'max_redirects'] # 'adapters', - val = options.__getattribute__('_cookies') + cookies = options.__getattribute__('_cookies') - if val is not None: - if isinstance(val, RequestsCookieJar): - re_dict['cookies'] = [_cookie_to_dict(cookie) for cookie in val] - else: - re_dict['cookies'] = val + if cookies is not None: + re_dict['cookies'] = _cookies_to_tuple(cookies) for attr in attrs: val = options.__getattribute__(f'_{attr}') @@ -720,7 +717,7 @@ def _session_options_to_dict(options: Union[dict, SessionOptions, None]) -> Unio return re_dict -def _cookie_to_dict(cookie: Cookie) -> dict: +def _cookie_to_dict(cookie: Union[Cookie, str, dict]) -> dict: """把Cookie对象转为dict格式 \n :param cookie: Cookie对象 :return: cookie字典 @@ -732,6 +729,44 @@ def _cookie_to_dict(cookie: Cookie) -> dict: return cookie_dict elif isinstance(cookie, dict): - return cookie + cookie_dict = cookie + + elif isinstance(cookie, str): + cookie = cookie.split(';') + cookie_dict = {} + + for key, attr in enumerate(cookie): + attr_val = attr.lstrip().split('=') + + if key == 0: + cookie_dict['name'] = attr_val[0] + cookie_dict['value'] = attr_val[1] + else: + cookie_dict[attr_val[0]] = attr_val[1] + + return cookie_dict + else: raise TypeError + + return cookie_dict + + +def _cookies_to_tuple(cookies: Union[RequestsCookieJar, list, tuple, str, dict]) -> tuple: + """把cookies转为tuple格式 \n + :param cookies: cookies信息,可为CookieJar, list, tuple, str, dict + :return: 返回tuple形式的cookies + """ + if isinstance(cookies, (list, tuple, RequestsCookieJar)): + cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies) + + elif isinstance(cookies, str): + cookies = tuple(dict([cookie.lstrip().split("=", 1)]) for cookie in cookies.split(";")) + + elif isinstance(cookies, dict): + cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) + + else: + raise TypeError + + return cookies diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 2335225..0b88c50 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -10,13 +10,13 @@ from typing import Union from requests import Session from requests.cookies import RequestsCookieJar from selenium import webdriver -from selenium.common.exceptions import SessionNotCreatedException, UnableToSetCookieException +from selenium.common.exceptions import SessionNotCreatedException from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver from tldextract import extract from .config import (_dict_to_chrome_options, _session_options_to_dict, - SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookie_to_dict) + SessionOptions, DriverOptions, _chrome_options_to_dict, OptionsManager, _cookies_to_tuple) class Drission(object): @@ -164,19 +164,13 @@ class Drission(object): :param set_driver: 是否设置driver的cookies :return: None """ - if isinstance(cookies, (list, tuple, RequestsCookieJar)): - cookies = tuple(_cookie_to_dict(cookie) for cookie in cookies) - elif isinstance(cookies, str): - cookies = tuple(dict([cookie.lstrip().split("=", 1)]) for cookie in cookies.split(";")) - elif isinstance(cookies, dict): - cookies = tuple({'name': cookie, 'value': cookies[cookie]} for cookie in cookies) - else: - raise TypeError + cookies = _cookies_to_tuple(cookies) for cookie in cookies: if cookie['value'] is None: cookie['value'] = '' + # 添加cookie到session if set_session: kwargs = {x: cookie[x] for x in cookie if x not in ('name', 'value', 'httpOnly', 'expiry')} @@ -185,26 +179,23 @@ class Drission(object): self.session.cookies.set(cookie['name'], cookie['value'], **kwargs) + # 添加cookie到driver if set_driver: if 'expiry' in cookie: cookie['expiry'] = int(cookie['expiry']) + cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] + try: - self.driver.add_cookie(cookie) + browser_domain = extract(self.driver.current_url).fqdn + except AttributeError: + browser_domain = '' - except UnableToSetCookieException: - cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] + if cookie_domain not in browser_domain: + self.driver.get(cookie_domain if cookie_domain.startswith('http://') + else f'http://{cookie_domain}') - try: - browser_domain = extract(self.driver.current_url).fqdn - except AttributeError: - browser_domain = '' - - if cookie_domain not in browser_domain: - self.driver.get(cookie_domain if cookie_domain.startswith('http://') - else f'http://{cookie_domain}') - - self.driver.add_cookie(cookie) + self.driver.add_cookie(cookie) def _set_session(self, data: dict) -> None: if self._session is None: From 0a5315b93aea36cca50cf5eb7328d213a880915f Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 4 Dec 2020 17:14:55 +0800 Subject: [PATCH 27/30] =?UTF-8?q?close=5Fother=5Ftabs()=E7=8E=B0=E5=9C=A8?= =?UTF-8?q?=E5=8F=AF=E4=BB=A5=E4=BF=9D=E7=95=99=E5=A4=9A=E4=B8=AAtab?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index f338672..9ea3ad5 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -115,7 +115,7 @@ class DriverPage(object): def ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement, WebElement], mode: str = None, - timeout: float = None) -> Union[DriverElement, List[DriverElement or str], str, None]: + timeout: float = None) -> Union[DriverElement, List[DriverElement], str, None]: """返回页面中符合条件的元素,默认返回第一个 \n 示例: \n - 接收到元素对象时: \n @@ -176,7 +176,7 @@ class DriverPage(object): def eles(self, loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> List[DriverElement or str]: + timeout: float = None) -> List[DriverElement]: """返回页面中所有符合条件的元素 \n 示例: \n - 用loc元组查找: \n @@ -335,33 +335,35 @@ class DriverPage(object): if self.tabs_count: self.to_tab(0) - def close_other_tabs(self, num_or_handle: Union[int, str] = None) -> None: - """关闭传入的标签页以外标签页,默认保留当前页 \n - :param num_or_handle: 要保留的标签页序号或handle,序号第一个为0,最后为-1 + def close_other_tabs(self, num_or_handles: Union[int, str, list, tuple] = None) -> None: + """关闭传入的标签页以外标签页,默认保留当前页。可传入列表或元组 \n + :param num_or_handles: 要保留的标签页序号或handle,可传入handle组成的列表或元组 :return: None """ try: - tab = int(num_or_handle) + tab = int(num_or_handles) except (ValueError, TypeError): - tab = num_or_handle + tab = num_or_handles tabs = self.driver.window_handles if tab is None: - page_handle = self.current_tab_handle + page_handle = (self.current_tab_handle,) elif isinstance(tab, int): - page_handle = tabs[tab] + page_handle = (tabs[tab],) elif isinstance(tab, str): + page_handle = (tab,) + elif isinstance(tab, (list, tuple)): page_handle = tab else: - raise TypeError('Argument num_or_handle can only be int or str.') + raise TypeError('Argument num_or_handle can only be int, str, list or tuple.') for i in tabs: # 遍历所有标签页,关闭非保留的 - if i != page_handle: + if i not in page_handle: self.driver.switch_to.window(i) self.driver.close() - self.driver.switch_to.window(page_handle) # 把权柄定位回保留的页面 + self.driver.switch_to.window(page_handle[0]) # 把权柄定位回保留的页面 def to_tab(self, num_or_handle: Union[int, str] = 0) -> None: """跳转到标签页 \n From b9365a9361e3342bf4e1c76cf57a2224a98049ad Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 4 Dec 2020 17:16:18 +0800 Subject: [PATCH 28/30] =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=B3=A8=E9=87=8A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_element.py | 4 ++-- DrissionPage/mix_page.py | 5 +++-- DrissionPage/session_element.py | 4 ++-- DrissionPage/session_page.py | 4 ++-- README.en.md | 6 +++--- README.zh-cn.md | 6 +++--- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/DrissionPage/driver_element.py b/DrissionPage/driver_element.py index 681a7a2..4ad440a 100644 --- a/DrissionPage/driver_element.py +++ b/DrissionPage/driver_element.py @@ -140,7 +140,7 @@ class DriverElement(DrissionElement): return self.get_style_property('content', 'after') # -----------------共有函数------------------- - def texts(self, text_node_only: bool = False) -> List[str]: + def texts(self, text_node_only: bool = False) -> list: """返回元素内所有直接子节点的文本,包括元素和文本节点 \n :param text_node_only: 是否只返回文本节点 :return: 文本列表 @@ -567,7 +567,7 @@ class DriverElement(DrissionElement): def execute_driver_find(page_or_ele, loc: Tuple[str, str], mode: str = 'single', - timeout: float = 10) -> Union[DriverElement, List[DriverElement or str], str, None]: + timeout: float = 10) -> Union[DriverElement, List[DriverElement], str, None]: """执行driver模式元素的查找 \n 页面查找元素及元素查找下级元素皆使用此方法 \n :param page_or_ele: DriverPage对象或DriverElement对象 diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index 8ae6078..3ff1eae 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -336,7 +336,8 @@ class MixPage(Null, SessionPage, DriverPage): def ele(self, loc_or_ele: Union[Tuple[str, str], str, DriverElement, SessionElement, WebElement], mode: str = None, - timeout: float = None) -> Union[DriverElement, SessionElement, str]: + timeout: float = None) -> Union[ + DriverElement, SessionElement, str, List[SessionElement], List[DriverElement]]: """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n 示例: \n - 接收到元素对象时: \n @@ -376,7 +377,7 @@ class MixPage(Null, SessionPage, DriverPage): def eles(self, loc_or_str: Union[Tuple[str, str], str], - timeout: float = None) -> Union[List[DriverElement or str], List[SessionElement or str]]: + timeout: float = None) -> Union[List[DriverElement], List[SessionElement]]: """返回页面中所有符合条件的元素、属性或节点文本 \n 示例: \n - 用loc元组查找: \n diff --git a/DrissionPage/session_element.py b/DrissionPage/session_element.py index a7d5b1c..793a3cd 100644 --- a/DrissionPage/session_element.py +++ b/DrissionPage/session_element.py @@ -92,7 +92,7 @@ class SessionElement(DrissionElement): """返回前一个兄弟元素""" return self._get_brother(1, 'ele', 'prev') - def texts(self, text_node_only: bool = False) -> List[str]: + def texts(self, text_node_only: bool = False) -> list: """返回元素内所有直接子节点的文本,包括元素和文本节点 \n :param text_node_only: 是否只返回文本节点 :return: 文本列表 @@ -340,7 +340,7 @@ class SessionElement(DrissionElement): def execute_session_find(page_or_ele, loc: Tuple[str, str], - mode: str = 'single', ) -> Union[SessionElement, List[SessionElement or str], str, None]: + mode: str = 'single', ) -> Union[SessionElement, List[SessionElement], str, None]: """执行session模式元素的查找 \n 页面查找元素及元素查找下级元素皆使用此方法 \n :param page_or_ele: SessionPage对象或SessionElement对象 diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index e99d95f..ca7ca0c 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -88,7 +88,7 @@ class SessionPage(object): def ele(self, loc_or_ele: Union[Tuple[str, str], str, SessionElement], - mode: str = None) -> Union[SessionElement, List[SessionElement or str], str, None]: + mode: str = None) -> Union[SessionElement, List[SessionElement], str, None]: """返回页面中符合条件的元素、属性或节点文本,默认返回第一个 \n 示例: \n - 接收到元素对象时: \n @@ -140,7 +140,7 @@ class SessionPage(object): return execute_session_find(self, loc_or_ele, mode) def eles(self, - loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement or str]: + loc_or_str: Union[Tuple[str, str], str]) -> List[SessionElement]: """返回页面中所有符合条件的元素、属性或节点文本 \n 示例: \n - 用loc元组查找: \n diff --git a/README.en.md b/README.en.md index 31f7923..d124a59 100644 --- a/README.en.md +++ b/README.en.md @@ -568,7 +568,7 @@ page.run_script(js, *args) # Run js statement page.create_tab(url) # Create and locate a tab page, which is at the end page.to_tab(num_or_handle) # Jump to tab page page.close_current_tab() # Close the current tab page -page.close_other_tabs(num) # Close other tabs +page.close_other_tabs(num_or_handles) # Close other tabs page.to_iframe(iframe) # cut into iframe page.screenshot(path) # Page screenshot page.scrool_to_see(element) # Scroll until an element is visible @@ -1669,11 +1669,11 @@ Returns: None ### close_other_tabs() -Close tab pages other than the incoming tab page, and keep the current page by default. +Close tab pages other than the incoming tab page, and keep the current page by default. You can pass in a list or tuple. Parameter Description: -- num_or_handle:[int, str] - The serial number or handle of the tab to keep, the first serial number is 0, and the last is - 1 +- num_or_handles:[int, str]-The serial number or handle of the tab to keep, you can pass in a list or tuple of handles Returns: None diff --git a/README.zh-cn.md b/README.zh-cn.md index dc1dabc..195f1d0 100644 --- a/README.zh-cn.md +++ b/README.zh-cn.md @@ -572,7 +572,7 @@ page.run_script(js, *args) # 运行 js 语句 page.create_tab(url) # 新建并定位到一个标签页,该标签页在最后面 page.to_tab(num_or_handle) # 跳转到标签页 page.close_current_tab() # 关闭当前标签页 -page.close_other_tabs(num) # 关闭其它标签页 +page.close_other_tabs(num_or_handles) # 关闭其它标签页 page.to_iframe(iframe) # 切入 iframe page.screenshot(path) # 页面截图 page.scroll_to_see(element) # 滚动直到某元素可见 @@ -1669,11 +1669,11 @@ d 模式时检查网页是否符合预期。默认由 response 状态检查, ### close_other_tabs() -关闭传入的标签页以外标签页,默认保留当前页。 +关闭传入的标签页以外标签页,默认保留当前页。可传入列表或元组。 参数说明: -- num_or_handle:[int, str] - 要保留的标签页序号或 handle,序号第一个为0,最后为-1 +- num_or_handles:[int, str] - 要保留的标签页序号或 handle,可传入 handle 组成的列表或元组 返回: None From 99fc7d650a9ac5ba259c86817ca1099cee25e363 Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 4 Dec 2020 18:03:34 +0800 Subject: [PATCH 29/30] =?UTF-8?q?=E4=BC=98=E5=8C=96cookies=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 0b88c50..8be5cc6 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -184,13 +184,23 @@ class Drission(object): if 'expiry' in cookie: cookie['expiry'] = int(cookie['expiry']) - cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] - try: browser_domain = extract(self.driver.current_url).fqdn except AttributeError: browser_domain = '' + if not cookie.get('domain', None): + if browser_domain: + url = extract(browser_domain) + cookie_domain = f'{url.domain}.{url.suffix}' + else: + raise ValueError('There is no domain name in the cookie or the browser has not visited a URL.') + + cookie['domain'] = cookie_domain + + else: + cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] + if cookie_domain not in browser_domain: self.driver.get(cookie_domain if cookie_domain.startswith('http://') else f'http://{cookie_domain}') From 95beaaa7e629f1bfecb36a9da2ff2d0842b714be Mon Sep 17 00:00:00 2001 From: g1879 Date: Fri, 4 Dec 2020 18:04:15 +0800 Subject: [PATCH 30/30] =?UTF-8?q?=E7=94=A8RawConfigParser=E4=BB=A3?= =?UTF-8?q?=E6=9B=BFConfigParser=EF=BC=8C=E9=81=BF=E5=85=8D=E9=85=8D?= =?UTF-8?q?=E7=BD=AE=E7=9A=84cookies=E4=B8=AD=E7=9A=84=E7=89=B9=E6=AE=8A?= =?UTF-8?q?=E5=AD=97=E7=AC=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/DrissionPage/config.py b/DrissionPage/config.py index 3d331c1..b7ea249 100644 --- a/DrissionPage/config.py +++ b/DrissionPage/config.py @@ -5,7 +5,7 @@ @Contact : g1879@qq.com @File : config.py """ -from configparser import ConfigParser, NoSectionError, NoOptionError +from configparser import RawConfigParser, NoSectionError, NoOptionError from http.cookiejar import Cookie from pathlib import Path from typing import Any, Union @@ -23,7 +23,7 @@ class OptionsManager(object): :param path: ini文件的路径,默认读取模块文件夹下的 """ self.ini_path = path or str(Path(__file__).parent / 'configs.ini') - self._conf = ConfigParser() + self._conf = RawConfigParser() self._conf.read(self.ini_path, encoding='utf-8') self._paths = None