浏览器页面对象增加is_alive属性;下拉列表增加by_loc选择方式;去除对tldextract依赖

This commit is contained in:
g1879 2023-03-07 18:15:09 +08:00
parent aaab200c5e
commit 651a00e666
7 changed files with 27 additions and 28 deletions

View File

@ -4,8 +4,7 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from copy import copy from copy import copy
from urllib.parse import urlparse
from tldextract import extract
from .chromium_base import ChromiumBase, ChromiumBaseSetter from .chromium_base import ChromiumBase, ChromiumBaseSetter
from .commons.web import set_session_cookies from .commons.web import set_session_cookies
@ -287,11 +286,12 @@ class WebPageTab(SessionPage, ChromiumTab):
def cookies_to_browser(self): def cookies_to_browser(self):
"""把session对象的cookies复制到浏览器""" """把session对象的cookies复制到浏览器"""
ex_url = extract(self._session_url) netloc = urlparse(self.url).netloc
domain = f'{ex_url.domain}.{ex_url.suffix}' u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
cookies = [] cookies = []
for cookie in super().get_cookies(): for cookie in super().get_cookies():
if cookie.get('domain', '') == '': if not cookie.get('domain', None):
cookie['domain'] = domain cookie['domain'] = domain
if domain in cookie['domain']: if domain in cookie['domain']:

View File

@ -5,6 +5,7 @@
""" """
from platform import system from platform import system
from sys import exit from sys import exit
from urllib.parse import urlparse
from requests import Session from requests import Session
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
@ -12,7 +13,6 @@ from selenium import webdriver
from selenium.common.exceptions import SessionNotCreatedException, WebDriverException from selenium.common.exceptions import SessionNotCreatedException, WebDriverException
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
from tldextract import extract
from DrissionPage.commons.tools import get_pid_from_port, get_exe_from_port from DrissionPage.commons.tools import get_pid_from_port, get_exe_from_port
from DrissionPage.commons.browser import connect_browser from DrissionPage.commons.browser import connect_browser
@ -262,14 +262,14 @@ class Drission(object):
cookie['expiry'] = int(cookie['expiry']) cookie['expiry'] = int(cookie['expiry'])
try: try:
browser_domain = extract(self.driver.current_url).fqdn browser_domain = urlparse(self.driver.current_url).netloc
except AttributeError: except AttributeError:
browser_domain = '' browser_domain = ''
if not cookie.get('domain', None): if not cookie.get('domain', None):
if browser_domain: if browser_domain:
url = extract(browser_domain) u = browser_domain.split('.')
cookie_domain = f'{url.domain}.{url.suffix}' cookie_domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else browser_domain
else: else:
raise ValueError('cookie中没有域名或浏览器未访问过URL。') raise ValueError('cookie中没有域名或浏览器未访问过URL。')
@ -279,8 +279,7 @@ class Drission(object):
cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:]
if cookie_domain not in browser_domain: if cookie_domain not in browser_domain:
self.driver.get(cookie_domain if cookie_domain.startswith('http://') self.driver.get(cookie_domain if cookie_domain.startswith('http://') else f'http://{cookie_domain}')
else f'http://{cookie_domain}')
# 避免selenium自动添加.后无法正确覆盖已有cookie # 避免selenium自动添加.后无法正确覆盖已有cookie
if cookie['domain'][0] != '.': if cookie['domain'][0] != '.':
@ -324,13 +323,14 @@ class Drission(object):
:param url: 作用域 :param url: 作用域
:return: None :return: None
""" """
browser_domain = extract(self.driver.current_url).fqdn browser_domain = urlparse(self.driver.current_url).netloc
ex_url = extract(url) ex_url = urlparse(url).netloc
if ex_url.fqdn not in browser_domain: if ex_url not in browser_domain:
self.driver.get(url) self.driver.get(url)
domain = f'{ex_url.domain}.{ex_url.suffix}' u = ex_url.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else ex_url
cookies = [] cookies = []
for cookie in self.session.cookies: for cookie in self.session.cookies:

View File

@ -11,7 +11,6 @@ from warnings import warn
from DownloadKit import DownloadKit from DownloadKit import DownloadKit
from requests import Session, Response from requests import Session, Response
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from tldextract import extract
from .base import BasePage from .base import BasePage
from DrissionPage.configs.session_options import SessionOptions from DrissionPage.configs.session_options import SessionOptions
@ -211,8 +210,9 @@ class SessionPage(BasePage):
cookies = self.session.cookies cookies = self.session.cookies
else: else:
if self.url: if self.url:
url = extract(self.url) netloc = urlparse(self.url).netloc
domain = f'{url.domain}.{url.suffix}' u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '')
else: else:
cookies = tuple(x for x in self.session.cookies) cookies = tuple(x for x in self.session.cookies)

View File

@ -11,7 +11,6 @@ from warnings import warn
from DownloadKit import DownloadKit from DownloadKit import DownloadKit
from requests import Session, Response from requests import Session, Response
from requests.structures import CaseInsensitiveDict from requests.structures import CaseInsensitiveDict
from tldextract import extract
from .base import BasePage from .base import BasePage
from .commons.web import cookie_to_dict, set_session_cookies from .commons.web import cookie_to_dict, set_session_cookies
@ -190,8 +189,9 @@ class SessionPage(BasePage):
cookies = self.session.cookies cookies = self.session.cookies
else: else:
if self.url: if self.url:
url = extract(self.url) netloc = urlparse(self.url).netloc
domain = f'{url.domain}.{url.suffix}' u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '') cookies = tuple(x for x in self.session.cookies if domain in x.domain or x.domain == '')
else: else:
cookies = tuple(x for x in self.session.cookies) cookies = tuple(x for x in self.session.cookies)

View File

@ -4,10 +4,10 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
""" """
from pathlib import Path from pathlib import Path
from urllib.parse import urlparse
from warnings import warn from warnings import warn
from requests import Session from requests import Session
from tldextract import extract
from .base import BasePage from .base import BasePage
from .chromium_base import ChromiumBase, Timeout from .chromium_base import ChromiumBase, Timeout
@ -360,11 +360,12 @@ class WebPage(SessionPage, ChromiumPage, BasePage):
def cookies_to_browser(self): def cookies_to_browser(self):
"""把session对象的cookies复制到浏览器""" """把session对象的cookies复制到浏览器"""
ex_url = extract(self._session_url) netloc = urlparse(self.url).netloc
domain = f'{ex_url.domain}.{ex_url.suffix}' u = netloc.split('.')
domain = f'{u[-2]}.{u[-1]}' if len(u) > 1 else netloc
cookies = [] cookies = []
for cookie in super().get_cookies(): for cookie in super().get_cookies():
if cookie.get('domain', '') == '': if not cookie.get('domain', None):
cookie['domain'] = domain cookie['domain'] = domain
if domain in cookie['domain']: if domain in cookie['domain']:

View File

@ -1,5 +1,4 @@
requests requests
tldextract
lxml lxml
cssselect cssselect
DownloadKit>=0.5.3 DownloadKit>=0.5.3

View File

@ -6,7 +6,7 @@ with open("README.md", "r", encoding='utf-8') as fh:
setup( setup(
name="DrissionPage", name="DrissionPage",
version="3.2.12", version="3.2.13",
author="g1879", author="g1879",
author_email="g1879@qq.com", author_email="g1879@qq.com",
description="Python based web automation tool. It can control the browser and send and receive data packets.", description="Python based web automation tool. It can control the browser and send and receive data packets.",
@ -20,7 +20,6 @@ setup(
zip_safe=False, zip_safe=False,
install_requires=[ install_requires=[
"lxml", "lxml",
"tldextract",
"requests", "requests",
"DownloadKit>=0.5.3", "DownloadKit>=0.5.3",
"FlowViewer", "FlowViewer",