微调import

This commit is contained in:
g1879 2020-08-13 14:16:26 +08:00
parent 4888614df5
commit 7a5c3b6f76
3 changed files with 17 additions and 16 deletions

View File

@ -4,10 +4,10 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
@File : common.py @File : common.py
""" """
import re
import shutil
from abc import abstractmethod from abc import abstractmethod
from pathlib import Path from pathlib import Path
from re import split as re_SPLIT
from shutil import rmtree
from typing import Union from typing import Union
from requests_html import Element from requests_html import Element
@ -93,7 +93,7 @@ def get_loc_from_str(loc: str) -> tuple:
""" """
loc_by = 'xpath' loc_by = 'xpath'
if loc.startswith('@'): # 根据属性查找 if loc.startswith('@'): # 根据属性查找
r = re.split(r'([:=])', loc[1:], maxsplit=1) r = re_SPLIT(r'([:=])', loc[1:], maxsplit=1)
if len(r) == 3: if len(r) == 3:
mode = 'exact' if r[1] == '=' else 'fuzzy' mode = 'exact' if r[1] == '=' else 'fuzzy'
loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode) loc_str = _make_xpath_str('*', f'@{r[0]}', r[2], mode)
@ -104,7 +104,7 @@ def get_loc_from_str(loc: str) -> tuple:
loc_str = f'//*[name()="{loc[4:]}"]' loc_str = f'//*[name()="{loc[4:]}"]'
else: else:
at_lst = loc[4:].split('@', maxsplit=1) at_lst = loc[4:].split('@', maxsplit=1)
r = re.split(r'([:=])', at_lst[1], maxsplit=1) r = re_SPLIT(r'([:=])', at_lst[1], maxsplit=1)
if len(r) == 3: if len(r) == 3:
mode = 'exact' if r[1] == '=' else 'fuzzy' mode = 'exact' if r[1] == '=' else 'fuzzy'
arg_str = r[0] if r[0] == 'text()' else f'@{r[0]}' arg_str = r[0] if r[0] == 'text()' else f'@{r[0]}'
@ -223,4 +223,4 @@ def clean_folder(folder_path: str, ignore: list = None) -> None:
if f.is_file(): if f.is_file():
f.unlink() f.unlink()
elif f.is_dir(): elif f.is_dir():
shutil.rmtree(f, True) rmtree(f, True)

View File

@ -7,13 +7,13 @@
from typing import Union from typing import Union
from urllib.parse import urlparse from urllib.parse import urlparse
import tldextract
from requests import Session from requests import Session
from requests_html import HTMLSession from requests_html import HTMLSession
from selenium import webdriver from selenium import webdriver
from selenium.common.exceptions import WebDriverException from selenium.common.exceptions import WebDriverException
from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.chrome.webdriver import WebDriver
from tldextract import extract
from .config import _dict_to_chrome_options, OptionsManager, _chrome_options_to_dict from .config import _dict_to_chrome_options, OptionsManager, _chrome_options_to_dict
@ -182,7 +182,7 @@ class Drission(object):
cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:] cookie_domain = cookie['domain'] if cookie['domain'][0] != '.' else cookie['domain'][1:]
try: try:
browser_domain = tldextract.extract(driver.current_url).fqdn browser_domain = extract(driver.current_url).fqdn
except AttributeError: except AttributeError:
browser_domain = '' browser_domain = ''
if cookie_domain not in browser_domain: if cookie_domain not in browser_domain:
@ -194,7 +194,7 @@ class Drission(object):
# 如果添加失败,尝试更宽的域名 # 如果添加失败,尝试更宽的域名
if not self._is_cookie_in_driver(cookie, driver): if not self._is_cookie_in_driver(cookie, driver):
cookie['domain'] = tldextract.extract(cookie['domain']).registered_domain cookie['domain'] = extract(cookie['domain']).registered_domain
driver.add_cookie(cookie) driver.add_cookie(cookie)
if not self._is_cookie_in_driver(cookie): if not self._is_cookie_in_driver(cookie):
raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n") raise WebDriverException(f"Couldn't add the following cookie to the webdriver\n{cookie}\n")

View File

@ -4,10 +4,11 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
@File : session_page.py @File : session_page.py
""" """
import os from os import path as os_PATH
import re
from pathlib import Path from pathlib import Path
from random import randint from random import randint
from re import search as re_SEARCH
from re import sub as re_SUB
from time import time from time import time
from typing import Union, List from typing import Union, List
from urllib.parse import urlparse, quote from urllib.parse import urlparse, quote
@ -209,14 +210,14 @@ class SessionPage(object):
# header里有文件名则使用它否则在url里截取但不能保证url包含文件名 # header里有文件名则使用它否则在url里截取但不能保证url包含文件名
if 'Content-disposition' in r.headers: if 'Content-disposition' in r.headers:
file_name = r.headers['Content-disposition'].split('"')[1].encode('ISO-8859-1').decode('utf-8') file_name = r.headers['Content-disposition'].split('"')[1].encode('ISO-8859-1').decode('utf-8')
elif os.path.basename(file_url): elif os_PATH.basename(file_url):
file_name = os.path.basename(file_url).split("?")[0] file_name = os_PATH.basename(file_url).split("?")[0]
else: else:
file_name = f'untitled_{time()}_{randint(0, 100)}' file_name = f'untitled_{time()}_{randint(0, 100)}'
file_name = re.sub(r'[\\/*:|<>?"]', '', file_name).strip() file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip()
if rename: # 重命名文件,不改变扩展名 if rename: # 重命名文件,不改变扩展名
rename = re.sub(r'[\\/*:|<>?"]', '', rename).strip() rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip()
ext_name = file_name.split('.')[-1] ext_name = file_name.split('.')[-1]
if rename.lower().endswith(f'.{ext_name}'.lower()) or ext_name == file_name: if rename.lower().endswith(f'.{ext_name}'.lower()) or ext_name == file_name:
full_name = rename full_name = rename
@ -228,7 +229,7 @@ class SessionPage(object):
goal_Path = Path(goal_path) goal_Path = Path(goal_path)
goal_path = '' goal_path = ''
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符 for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re.sub(r'[*:|<>?"]', '', i).strip() goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else '' goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
goal_Path = Path(goal_path) goal_Path = Path(goal_path)
@ -319,7 +320,7 @@ class SessionPage(object):
else: else:
headers = dict(r.headers) headers = dict(r.headers)
if 'Content-Type' not in headers or 'charset' not in headers['Content-Type']: if 'Content-Type' not in headers or 'charset' not in headers['Content-Type']:
re_result = re.search(r'<meta.*?charset=[ \'"]*([^"\' />]+).*?>', r.text) re_result = re_SEARCH(r'<meta.*?charset=[ \'"]*([^"\' />]+).*?>', r.text)
try: try:
charset = re_result.group(1) charset = re_result.group(1)
except: except: