diff --git a/DrissionPage/common.py b/DrissionPage/common.py index 4a5de9b..3a7f594 100644 --- a/DrissionPage/common.py +++ b/DrissionPage/common.py @@ -4,10 +4,10 @@ @Contact : g1879@qq.com @File : common.py """ -import re -import shutil from abc import abstractmethod from pathlib import Path +from re import split as re_SPLIT +from shutil import rmtree from typing import Union from requests_html import Element @@ -93,7 +93,7 @@ def get_loc_from_str(loc: str) -> tuple: """ loc_by = 'xpath' if loc.startswith('@'): # 根据属性查找 - r = re.split(r'([:=])', loc[1:], maxsplit=1) + r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1) if len(r) == 3: mode = 'exact' if r[1] == '=' else 'fuzzy' loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode) @@ -104,7 +104,7 @@ def get_loc_from_str(loc: str) -> tuple: loc_str = f'//*[name()="{loc[4:]}"]' else: at_lst = loc[4:].split('@', maxsplit=1) - r = re.split(r'([:=])', at_lst[1], maxsplit=1) + r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1) if len(r) == 3: mode = 'exact' if r[1] == '=' else 'fuzzy' arg_str = r[0] if r[0] == 'text()' else f'@{r[0]}' @@ -223,4 +223,4 @@ def clean_folder(folder_path: str, ignore: list = None) -> None: if f.is_file(): f.unlink() elif f.is_dir(): - shutil.rmtree(f, True) + rmtree(f, True) diff --git a/DrissionPage/drission.py b/DrissionPage/drission.py index b4631f5..931c291 100644 --- a/DrissionPage/drission.py +++ b/DrissionPage/drission.py @@ -7,13 +7,13 @@ from typing import Union from urllib.parse import urlparse -import tldextract from requests import Session from requests_html import HTMLSession from selenium import webdriver from selenium.common.exceptions import WebDriverException from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver +from tldextract import extract from .config import _dict_to_chrome_options, OptionsManager, _chrome_options_to_dict @@ -182,7 +182,7 @@ class Drission(object): cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] try: - browser_domain = tldextract.extract(driver.current_url).fqdn + browser_domain = extract(driver.current_url).fqdn except AttributeError: browser_domain = '' if cookie_domain not in browser_domain: @@ -194,7 +194,7 @@ class Drission(object): # 如果添加失败,尝试更宽的域名 if not self._is_cookie_in_driver(cookie, driver): - cookie['domain'] = tldextract.extract(cookie['domain']).registered_domain + cookie['domain'] = extract(cookie['domain']).registered_domain driver.add_cookie(cookie) if not self._is_cookie_in_driver(cookie): raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n") diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 5a28bb0..9170cce 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -4,10 +4,11 @@ @Contact : g1879@qq.com @File : session_page.py """ -import os -import re +from os import path as os_PATH from pathlib import Path from random import randint +from re import search as re_SEARCH +from re import sub as re_SUB from time import time from typing import Union, List from urllib.parse import urlparse, quote @@ -209,14 +210,14 @@ class SessionPage(object): # header里有文件名,则使用它,否则在url里截取,但不能保证url包含文件名 if 'Content-disposition' in r.headers: file_name = r.headers['Content-disposition'].split('"')[1].encode('ISO-8859-1').decode('utf-8') - elif os.path.basename(file_url): - file_name = os.path.basename(file_url).split("?")[0] + elif os_PATH.basename(file_url): + file_name = os_PATH.basename(file_url).split("?")[0] else: file_name = f'untitled_{time()}_{randint(0, 100)}' - file_name = re.sub(r'[\\/*:|<>?"]', '', file_name).strip() + file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip() if rename: # 重命名文件,不改变扩展名 - rename = re.sub(r'[\\/*:|<>?"]', '', rename).strip() + rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip() ext_name = file_name.split('.')[-1] if rename.lower().endswith(f'.{ext_name}'.lower()) or ext_name == file_name: full_name = rename @@ -228,7 +229,7 @@ class SessionPage(object): goal_Path = Path(goal_path) goal_path = '' for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符 - goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re.sub(r'[*:|<>?"]', '', i).strip() + goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip() goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else '' goal_Path = Path(goal_path) @@ -319,7 +320,7 @@ class SessionPage(object): else: headers = dict(r.headers) if 'Content-Type' not in headers or 'charset' not in headers['Content-Type']: - re_result = re.search(r']+).*?>', r.text) + re_result = re_SEARCH(r']+).*?>', r.text) try: charset = re_result.group(1) except: