From 651a00e6661e82f58d17abbbcffe3bccf78c6a4b Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 7 Mar 2023 18:15:09 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B5=8F=E8=A7=88=E5=99=A8=E9=A1=B5=E9=9D=A2?= =?UTF-8?q?=E5=AF=B9=E8=B1=A1=E5=A2=9E=E5=8A=A0is=5Falive=E5=B1=9E?= =?UTF-8?q?=E6=80=A7=EF=BC=9B=E4=B8=8B=E6=8B=89=E5=88=97=E8=A1=A8=E5=A2=9E?= =?UTF-8?q?=E5=8A=A0by=5Floc=E9=80=89=E6=8B=A9=E6=96=B9=E5=BC=8F=EF=BC=9B?= =?UTF-8?q?=E5=8E=BB=E9=99=A4=E5=AF=B9tldextract=E4=BE=9D=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/chromium_tab.py | 10 +++++----- DrissionPage/mixpage/drission.py | 20 ++++++++++---------- DrissionPage/mixpage/session_page.py | 6 +++--- DrissionPage/session_page.py | 6 +++--- DrissionPage/web_page.py | 9 +++++---- requirements.txt | 1 - setup.py | 3 +-- 7 files changed, 27 insertions(+), 28 deletions(-) diff --git a/DrissionPage/chromium_tab.py b/DrissionPage/chromium_tab.py index 6aaefda..fbcb09a 100644 --- a/DrissionPage/chromium_tab.py +++ b/DrissionPage/chromium_tab.py @@ -4,8 +4,7 @@ @Contact : g1879@qq.com """ from copy import copy - -from tldextract import extract +from urllib.parse import urlparse from .chromium_base import ChromiumBase, ChromiumBaseSetter from .commons.web import set_session_cookies @@ -287,11 +286,12 @@ class WebPageTab(SessionPage, ChromiumTab): def cookies_to_browser(self): """把session对象的cookies复制到浏览器""" - ex_url = extract(self._session_url) - domain = f'{ex_url.domain}.{ex_url.suffix}' + netloc = urlparse(self.url).netloc + u = netloc.split('.') + domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc cookies = [] for cookie in super().get_cookies(): - if cookie.get('domain', '') == '': + if not cookie.get('domain', None): cookie['domain'] = domain if domain in cookie['domain']: diff --git a/DrissionPage/mixpage/drission.py b/DrissionPage/mixpage/drission.py index e745220..aa75e3b 100644 --- a/DrissionPage/mixpage/drission.py +++ b/DrissionPage/mixpage/drission.py @@ -5,6 +5,7 @@ """ from platform import system from sys import exit +from urllib.parse import urlparse from requests import Session from requests.structures import CaseInsensitiveDict @@ -12,7 +13,6 @@ from selenium import webdriver from selenium.common.exceptions import SessionNotCreatedException, WebDriverException from selenium.webdriver.chrome.options import Options from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver -from tldextract import extract from DrissionPage.commons.tools import get_pid_from_port, get_exe_from_port from DrissionPage.commons.browser import connect_browser @@ -262,14 +262,14 @@ class Drission(object): cookie['expiry'] = int(cookie['expiry']) try: - browser_domain = extract(self.driver.current_url).fqdn + browser_domain = urlparse(self.driver.current_url).netloc except AttributeError: browser_domain = '' if not cookie.get('domain', None): if browser_domain: - url = extract(browser_domain) - cookie_domain = f'{url.domain}.{url.suffix}' + u = browser_domain.split('.') + cookie_domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else browser_domain else: raise ValueError('cookie中没有域名或浏览器未访问过URL。') @@ -279,8 +279,7 @@ class Drission(object): cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] if cookie_domain not in browser_domain: - self.driver.get(cookie_domain if cookie_domain.startswith('http://') - else f'http://{cookie_domain}') + self.driver.get(cookie_domain if cookie_domain.startswith('http://') else f'http://{cookie_domain}') # 避免selenium自动添加.后无法正确覆盖已有cookie if cookie['domain'][0] != '.': @@ -324,13 +323,14 @@ class Drission(object): :param url: 作用域 :return: None """ - browser_domain = extract(self.driver.current_url).fqdn - ex_url = extract(url) + browser_domain = urlparse(self.driver.current_url).netloc + ex_url = urlparse(url).netloc - if ex_url.fqdn not in browser_domain: + if ex_url not in browser_domain: self.driver.get(url) - domain = f'{ex_url.domain}.{ex_url.suffix}' + u = ex_url.split('.') + domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else ex_url cookies = [] for cookie in self.session.cookies: diff --git a/DrissionPage/mixpage/session_page.py b/DrissionPage/mixpage/session_page.py index 345b51d..b8397ce 100644 --- a/DrissionPage/mixpage/session_page.py +++ b/DrissionPage/mixpage/session_page.py @@ -11,7 +11,6 @@ from warnings import warn from DownloadKit import DownloadKit from requests import Session, Response from requests.structures import CaseInsensitiveDict -from tldextract import extract from .base import BasePage from DrissionPage.configs.session_options import SessionOptions @@ -211,8 +210,9 @@ class SessionPage(BasePage): cookies = self.session.cookies else: if self.url: - url = extract(self.url) - domain = f'{url.domain}.{url.suffix}' + netloc = urlparse(self.url).netloc + u = netloc.split('.') + domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') else: cookies = tuple(x for x in self.session.cookies) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 2447e78..fd76734 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -11,7 +11,6 @@ from warnings import warn from DownloadKit import DownloadKit from requests import Session, Response from requests.structures import CaseInsensitiveDict -from tldextract import extract from .base import BasePage from .commons.web import cookie_to_dict, set_session_cookies @@ -190,8 +189,9 @@ class SessionPage(BasePage): cookies = self.session.cookies else: if self.url: - url = extract(self.url) - domain = f'{url.domain}.{url.suffix}' + netloc = urlparse(self.url).netloc + u = netloc.split('.') + domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') else: cookies = tuple(x for x in self.session.cookies) diff --git a/DrissionPage/web_page.py b/DrissionPage/web_page.py index 321e698..071a715 100644 --- a/DrissionPage/web_page.py +++ b/DrissionPage/web_page.py @@ -4,10 +4,10 @@ @Contact : g1879@qq.com """ from pathlib import Path +from urllib.parse import urlparse from warnings import warn from requests import Session -from tldextract import extract from .base import BasePage from .chromium_base import ChromiumBase, Timeout @@ -360,11 +360,12 @@ class WebPage(SessionPage, ChromiumPage, BasePage): def cookies_to_browser(self): """把session对象的cookies复制到浏览器""" - ex_url = extract(self._session_url) - domain = f'{ex_url.domain}.{ex_url.suffix}' + netloc = urlparse(self.url).netloc + u = netloc.split('.') + domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc cookies = [] for cookie in super().get_cookies(): - if cookie.get('domain', '') == '': + if not cookie.get('domain', None): cookie['domain'] = domain if domain in cookie['domain']: diff --git a/requirements.txt b/requirements.txt index a73daab..1e86265 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ requests -tldextract lxml cssselect DownloadKit>=0.5.3 diff --git a/setup.py b/setup.py index fbdb367..84bb355 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh: setup( name="DrissionPage", - version="3.2.12", + version="3.2.13", author="g1879", author_email="g1879@qq.com", description="Python based web automation tool. It can control the browser and send and receive data packets.", @@ -20,7 +20,6 @@ setup( zip_safe=False, install_requires=[ "lxml", - "tldextract", "requests", "DownloadKit>=0.5.3", "FlowViewer",