mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
微调import
This commit is contained in:
parent
4888614df5
commit
7a5c3b6f76
@ -4,10 +4,10 @@
|
|||||||
@Contact : g1879@qq.com
|
@Contact : g1879@qq.com
|
||||||
@File : common.py
|
@File : common.py
|
||||||
"""
|
"""
|
||||||
import re
|
|
||||||
import shutil
|
|
||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from re import split as re_SPLIT
|
||||||
|
from shutil import rmtree
|
||||||
from typing import Union
|
from typing import Union
|
||||||
|
|
||||||
from requests_html import Element
|
from requests_html import Element
|
||||||
@ -93,7 +93,7 @@ def get_loc_from_str(loc: str) -> tuple:
|
|||||||
"""
|
"""
|
||||||
loc_by = 'xpath'
|
loc_by = 'xpath'
|
||||||
if loc.startswith('@'): # 根据属性查找
|
if loc.startswith('@'): # 根据属性查找
|
||||||
r = re.split(r'([:=])', loc[1:], maxsplit=1)
|
r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
|
||||||
if len(r) == 3:
|
if len(r) == 3:
|
||||||
mode = 'exact' if r[1] == '=' else 'fuzzy'
|
mode = 'exact' if r[1] == '=' else 'fuzzy'
|
||||||
loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode)
|
loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode)
|
||||||
@ -104,7 +104,7 @@ def get_loc_from_str(loc: str) -> tuple:
|
|||||||
loc_str = f'//*[name()="{loc[4:]}"]'
|
loc_str = f'//*[name()="{loc[4:]}"]'
|
||||||
else:
|
else:
|
||||||
at_lst = loc[4:].split('@', maxsplit=1)
|
at_lst = loc[4:].split('@', maxsplit=1)
|
||||||
r = re.split(r'([:=])', at_lst[1], maxsplit=1)
|
r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
|
||||||
if len(r) == 3:
|
if len(r) == 3:
|
||||||
mode = 'exact' if r[1] == '=' else 'fuzzy'
|
mode = 'exact' if r[1] == '=' else 'fuzzy'
|
||||||
arg_str = r[0] if r[0] == 'text()' else f'@{r[0]}'
|
arg_str = r[0] if r[0] == 'text()' else f'@{r[0]}'
|
||||||
@ -223,4 +223,4 @@ def clean_folder(folder_path: str, ignore: list = None) -> None:
|
|||||||
if f.is_file():
|
if f.is_file():
|
||||||
f.unlink()
|
f.unlink()
|
||||||
elif f.is_dir():
|
elif f.is_dir():
|
||||||
shutil.rmtree(f, True)
|
rmtree(f, True)
|
||||||
|
@ -7,13 +7,13 @@
|
|||||||
from typing import Union
|
from typing import Union
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import tldextract
|
|
||||||
from requests import Session
|
from requests import Session
|
||||||
from requests_html import HTMLSession
|
from requests_html import HTMLSession
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.common.exceptions import WebDriverException
|
from selenium.common.exceptions import WebDriverException
|
||||||
from selenium.webdriver.chrome.options import Options
|
from selenium.webdriver.chrome.options import Options
|
||||||
from selenium.webdriver.chrome.webdriver import WebDriver
|
from selenium.webdriver.chrome.webdriver import WebDriver
|
||||||
|
from tldextract import extract
|
||||||
|
|
||||||
from .config import _dict_to_chrome_options, OptionsManager, _chrome_options_to_dict
|
from .config import _dict_to_chrome_options, OptionsManager, _chrome_options_to_dict
|
||||||
|
|
||||||
@ -182,7 +182,7 @@ class Drission(object):
|
|||||||
|
|
||||||
cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:]
|
cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:]
|
||||||
try:
|
try:
|
||||||
browser_domain = tldextract.extract(driver.current_url).fqdn
|
browser_domain = extract(driver.current_url).fqdn
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
browser_domain = ''
|
browser_domain = ''
|
||||||
if cookie_domain not in browser_domain:
|
if cookie_domain not in browser_domain:
|
||||||
@ -194,7 +194,7 @@ class Drission(object):
|
|||||||
|
|
||||||
# 如果添加失败,尝试更宽的域名
|
# 如果添加失败,尝试更宽的域名
|
||||||
if not self._is_cookie_in_driver(cookie, driver):
|
if not self._is_cookie_in_driver(cookie, driver):
|
||||||
cookie['domain'] = tldextract.extract(cookie['domain']).registered_domain
|
cookie['domain'] = extract(cookie['domain']).registered_domain
|
||||||
driver.add_cookie(cookie)
|
driver.add_cookie(cookie)
|
||||||
if not self._is_cookie_in_driver(cookie):
|
if not self._is_cookie_in_driver(cookie):
|
||||||
raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n")
|
raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n")
|
||||||
|
@ -4,10 +4,11 @@
|
|||||||
@Contact : g1879@qq.com
|
@Contact : g1879@qq.com
|
||||||
@File : session_page.py
|
@File : session_page.py
|
||||||
"""
|
"""
|
||||||
import os
|
from os import path as os_PATH
|
||||||
import re
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from random import randint
|
from random import randint
|
||||||
|
from re import search as re_SEARCH
|
||||||
|
from re import sub as re_SUB
|
||||||
from time import time
|
from time import time
|
||||||
from typing import Union, List
|
from typing import Union, List
|
||||||
from urllib.parse import urlparse, quote
|
from urllib.parse import urlparse, quote
|
||||||
@ -209,14 +210,14 @@ class SessionPage(object):
|
|||||||
# header里有文件名,则使用它,否则在url里截取,但不能保证url包含文件名
|
# header里有文件名,则使用它,否则在url里截取,但不能保证url包含文件名
|
||||||
if 'Content-disposition' in r.headers:
|
if 'Content-disposition' in r.headers:
|
||||||
file_name = r.headers['Content-disposition'].split('"')[1].encode('ISO-8859-1').decode('utf-8')
|
file_name = r.headers['Content-disposition'].split('"')[1].encode('ISO-8859-1').decode('utf-8')
|
||||||
elif os.path.basename(file_url):
|
elif os_PATH.basename(file_url):
|
||||||
file_name = os.path.basename(file_url).split("?")[0]
|
file_name = os_PATH.basename(file_url).split("?")[0]
|
||||||
else:
|
else:
|
||||||
file_name = f'untitled_{time()}_{randint(0, 100)}'
|
file_name = f'untitled_{time()}_{randint(0, 100)}'
|
||||||
|
|
||||||
file_name = re.sub(r'[\\/*:|<>?"]', '', file_name).strip()
|
file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip()
|
||||||
if rename: # 重命名文件,不改变扩展名
|
if rename: # 重命名文件,不改变扩展名
|
||||||
rename = re.sub(r'[\\/*:|<>?"]', '', rename).strip()
|
rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip()
|
||||||
ext_name = file_name.split('.')[-1]
|
ext_name = file_name.split('.')[-1]
|
||||||
if rename.lower().endswith(f'.{ext_name}'.lower()) or ext_name == file_name:
|
if rename.lower().endswith(f'.{ext_name}'.lower()) or ext_name == file_name:
|
||||||
full_name = rename
|
full_name = rename
|
||||||
@ -228,7 +229,7 @@ class SessionPage(object):
|
|||||||
goal_Path = Path(goal_path)
|
goal_Path = Path(goal_path)
|
||||||
goal_path = ''
|
goal_path = ''
|
||||||
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
|
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
|
||||||
goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re.sub(r'[*:|<>?"]', '', i).strip()
|
goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
|
||||||
goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
|
goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
|
||||||
|
|
||||||
goal_Path = Path(goal_path)
|
goal_Path = Path(goal_path)
|
||||||
@ -319,7 +320,7 @@ class SessionPage(object):
|
|||||||
else:
|
else:
|
||||||
headers = dict(r.headers)
|
headers = dict(r.headers)
|
||||||
if 'Content-Type' not in headers or 'charset' not in headers['Content-Type']:
|
if 'Content-Type' not in headers or 'charset' not in headers['Content-Type']:
|
||||||
re_result = re.search(r'<meta.*?charset=[ \'"]*([^"\' />]+).*?>', r.text)
|
re_result = re_SEARCH(r'<meta.*?charset=[ \'"]*([^"\' />]+).*?>', r.text)
|
||||||
try:
|
try:
|
||||||
charset = re_result.group(1)
|
charset = re_result.group(1)
|
||||||
except:
|
except:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user