From b00c06ecdee8531e83472ac6a12b6ee4d8139202 Mon Sep 17 00:00:00 2001 From: g1879 Date: Mon, 15 Jun 2020 16:42:15 +0800 Subject: [PATCH 1/4] =?UTF-8?q?to=5Fiframe()=E5=88=A4=E6=96=AD=E6=8E=A7?= =?UTF-8?q?=E5=88=B6=E5=AD=97=E7=AC=A6=E4=B8=B2=E6=97=B6=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=3D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/driver_page.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index 1a93972..dd0dd28 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -5,7 +5,6 @@ @File : driver_page.py """ from glob import glob -from time import sleep from typing import Union, List, Any from urllib.parse import quote @@ -13,8 +12,7 @@ from selenium.common.exceptions import NoAlertPresentException from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement -from .common import get_loc_from_str, clean_folder, avoid_duplicate_name -from .config import OptionsManager +from .common import get_loc_from_str, avoid_duplicate_name from .driver_element import DriverElement, execute_driver_find @@ -147,7 +145,7 @@ class DriverPage(object): self.driver.switch_to.default_content() elif loc_or_ele == 'parent': self.driver.switch_to.parent_frame() - elif ':' not in loc_or_ele: + elif ':' not in loc_or_ele and '=' not in loc_or_ele: # 传入id或name self.driver.switch_to.frame(loc_or_ele) else: From 4497ee9fecd90bded3a1226866d8949239476c8d Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 16 Jun 2020 18:06:45 +0800 Subject: [PATCH 2/4] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E4=BB=A3=E7=90=86?= =?UTF-8?q?=E8=AE=BE=E7=BD=AE=EF=BC=8C=E6=9C=AA=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/mix_page.py | 11 +++-------- DrissionPage/session_page.py | 17 ++++++++++++++--- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index c06cec7..fafb624 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -5,8 +5,6 @@ @File : mix_page.py """ from typing import Union, List -from urllib import parse -from urllib.parse import quote from requests import Response from requests_html import HTMLSession @@ -47,6 +45,7 @@ class MixPage(Null, SessionPage, DriverPage): self._driver = None self._url = None self._response = None + self._proxies = None self.timeout = timeout self._url_available = None self._mode = mode @@ -155,11 +154,10 @@ class MixPage(Null, SessionPage, DriverPage): # ----------------重写SessionPage的函数----------------------- - def post(self, url: str, params: dict = None, data: dict = None, go_anyway: bool = False, **kwargs) \ - -> Union[bool, None]: + def post(self, url: str, data: dict = None, go_anyway: bool = False, **kwargs) -> Union[bool, None]: """post前先转换模式,但不跳转""" self.change_mode('s', go=False) - return super().post(url, params, data, go_anyway, **kwargs) + return super().post(url, data, go_anyway, **kwargs) # ----------------重写DriverPage的函数----------------------- @@ -182,9 +180,6 @@ class MixPage(Null, SessionPage, DriverPage): def get(self, url: str, go_anyway=False, **kwargs) -> Union[bool, None]: """跳转到一个url,跳转前先同步cookies,跳转后判断目标url是否可用""" - # to_url = quote(url, safe='/:&?=%;#@') - # if not url or (not go_anyway and self.url == to_url): - # return if self._mode == 'd': if super(SessionPage, self).get(url=url, go_anyway=go_anyway) is None: return diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 5de4976..c90e63d 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -29,6 +29,7 @@ class SessionPage(object): self._url = None self._url_available = None self._response = None + self._proxies = None @property def session(self) -> HTMLSession: @@ -53,6 +54,14 @@ class SessionPage(object): """当前session的cookies""" return self.session.cookies.get_dict() + @property + def proxies(self) -> dict: + return self._proxies + + @proxies.setter + def proxies(self, value: dict): + self._proxies = value + @property def title(self) -> str: """获取网页title""" @@ -85,7 +94,7 @@ class SessionPage(object): """查找符合条件的所有元素""" return self.ele(loc, mode='all', show_errmsg=True) - def get(self, url: str, go_anyway: bool = False, **kwargs) -> Union[bool, None]: + def get(self, url: str, go_anyway: bool = False, **kwargs) -> Union[bool, None]: """用get方式跳转到url,调用_make_response()函数生成response对象""" to_url = quote(url, safe='/:&?=%;#@') if not url or (not go_anyway and self.url == to_url): @@ -97,8 +106,7 @@ class SessionPage(object): self._url_available = True if self._response and self._response.ok else False return self._url_available - def post(self, url: str, data: dict = None, go_anyway: bool = False, **kwargs) \ - -> Union[bool, None]: + def post(self, url: str, data: dict = None, go_anyway: bool = False, **kwargs) -> Union[bool, None]: """用post方式跳转到url,调用_make_response()函数生成response对象""" to_url = quote(url, safe='/:&?=%;#@') if not url or (not go_anyway and self._url == to_url): @@ -209,6 +217,9 @@ class SessionPage(object): if self._url: kwargs['headers']['Referer'] = self._url + if 'proxies' not in kwargs_set and self._proxies: + kwargs['proxies'] = self.proxies + if 'timeout' not in kwargs_set: kwargs['timeout'] = self.timeout From de51562194412a9a03abb707d3ae3906e1763186 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 16 Jun 2020 18:08:52 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=E5=BE=AE=E8=B0=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/common.py | 2 +- DrissionPage/driver_page.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 809ce4d..e4c2d0d 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -197,4 +197,4 @@ def clean_folder(folder_path: str, ignore: list = None): if f.is_file(): f.unlink() elif f.is_dir(): - shutil.rmtree(f, True) \ No newline at end of file + shutil.rmtree(f, True) diff --git a/DrissionPage/driver_page.py b/DrissionPage/driver_page.py index dd0dd28..a692fdb 100644 --- a/DrissionPage/driver_page.py +++ b/DrissionPage/driver_page.py @@ -5,6 +5,7 @@ @File : driver_page.py """ from glob import glob +# from time import sleep from typing import Union, List, Any from urllib.parse import quote @@ -12,7 +13,8 @@ from selenium.common.exceptions import NoAlertPresentException from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webelement import WebElement -from .common import get_loc_from_str, avoid_duplicate_name +from .common import get_loc_from_str, avoid_duplicate_name # , clean_folder +# from .config import OptionsManager from .driver_element import DriverElement, execute_driver_find From 4ffa174e5ab661fcd2748dc6a54e0fc13c321dae Mon Sep 17 00:00:00 2001 From: g1879 Date: Wed, 17 Jun 2020 17:40:00 +0800 Subject: [PATCH 4/4] =?UTF-8?q?proxy=E8=AE=BE=E7=BD=AE=E7=A7=BB=E5=88=B0dr?= =?UTF-8?q?ission?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/drission.py | 31 ++++++++++++++++++++++++++++--- DrissionPage/mix_page.py | 2 +- DrissionPage/session_page.py | 13 ------------- 3 files changed, 29 insertions(+), 17 deletions(-) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index 13ab9c5..4fc3106 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -22,18 +22,20 @@ class Drission(object): """Drission类整合了WebDriver对象和HTLSession对象,可按要求创建、关闭及同步cookies""" def __init__(self, driver_options: Union[dict, Options] = None, session_options: dict = None, - driver_path: str = None, ini_path: str = None): + driver_path: str = None, ini_path: str = None, proxy: dict = None): """初始化配置信息,但不生成session和driver实例 :param driver_options: chrome设置,Options类或设置字典 :param session_options: session设置 :param driver_path: chromedriver路径,如为空,则为'chromedriver' :param ini_path: ini文件路径' + :param proxy: 代理设置 """ self._session = None self._driver = None om = OptionsManager(ini_path) self._session_options = session_options or om.get_option('session_options') self._driver_options = _chrome_options_to_dict(driver_options) or om.get_option('chrome_options') + self._proxy = proxy if driver_path: self._driver_path = driver_path @@ -43,7 +45,7 @@ class Drission(object): self._driver_path = 'chromedriver' @property - def session(self): + def session(self) -> HTMLSession: """获取HTMLSession对象""" if self._session is None: self._session = HTMLSession() @@ -52,17 +54,21 @@ class Drission(object): for i in attrs: if i in self._session_options: exec(f'self._session.{i} = self._session_options["{i}"]') + if self._proxy: + self._session.proxies = self._proxy return self._session @property - def driver(self): + def driver(self) -> WebDriver: """获取WebDriver对象,按传入配置信息初始化""" if self._driver is None: if isinstance(self._driver_options, dict): options = _dict_to_chrome_options(self._driver_options) else: raise KeyError('Driver options invalid') + if self._proxy: + options.add_argument(f'--proxy-server={self._proxy["http"]}') self._driver = webdriver.Chrome(self._driver_path, options=options) @@ -89,6 +95,25 @@ class Drission(object): def session_options(self, value: dict): self._session_options = value + @property + def proxy(self) -> Union[None, dict]: + return self._proxy + + @proxy.setter + def proxy(self, proxies: dict = None): + self._proxy = proxies + if self._session: + self._session.proxies = proxies + if self._driver: + cookies = self._driver.get_cookies() + url = self._driver.current_url + self._driver.quit() + self._driver = None + self._driver = self.driver + self._driver.get(url) + for cookie in cookies: + self._driver.add_cookie(cookie) + def cookies_to_session(self, copy_user_agent: bool = False, driver: WebDriver = None, session: Session = None) \ -> None: """把driver的cookies复制到session""" diff --git a/DrissionPage/mix_page.py b/DrissionPage/mix_page.py index fafb624..25a4348 100644 --- a/DrissionPage/mix_page.py +++ b/DrissionPage/mix_page.py @@ -45,7 +45,6 @@ class MixPage(Null, SessionPage, DriverPage): self._driver = None self._url = None self._response = None - self._proxies = None self.timeout = timeout self._url_available = None self._mode = mode @@ -111,6 +110,7 @@ class MixPage(Null, SessionPage, DriverPage): 每次访问时切换到d模式,主要用于独有函数及外部调用 :return:selenium的WebDriver对象 """ + # TODO: 改成每次获取drission的driver if self._driver is None: self._driver = self._drission.driver self.change_mode('d') diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index c90e63d..e24e46c 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -29,7 +29,6 @@ class SessionPage(object): self._url = None self._url_available = None self._response = None - self._proxies = None @property def session(self) -> HTMLSession: @@ -54,14 +53,6 @@ class SessionPage(object): """当前session的cookies""" return self.session.cookies.get_dict() - @property - def proxies(self) -> dict: - return self._proxies - - @proxies.setter - def proxies(self, value: dict): - self._proxies = value - @property def title(self) -> str: """获取网页title""" @@ -70,7 +61,6 @@ class SessionPage(object): @property def html(self) -> str: """获取元素innerHTML,如未指定元素则获取所有源代码""" - # return unescape(self.response.html.raw_html.replace(b'\x08', b'').decode()).replace('\xa0', ' ') return self.response.html.html def ele(self, loc_or_ele: Union[tuple, str, SessionElement], mode: str = None, show_errmsg: bool = False) \ @@ -217,9 +207,6 @@ class SessionPage(object): if self._url: kwargs['headers']['Referer'] = self._url - if 'proxies' not in kwargs_set and self._proxies: - kwargs['proxies'] = self.proxies - if 'timeout' not in kwargs_set: kwargs['timeout'] = self.timeout