download函数优化获取文件名逻辑

This commit is contained in:
g1879 2020-09-15 13:43:02 +08:00
parent 39a96c852e
commit 265c997efc

View File

@ -4,6 +4,7 @@
@Contact : g1879@qq.com @Contact : g1879@qq.com
@File : session_page.py @File : session_page.py
""" """
import re
from os import path as os_PATH from os import path as os_PATH
from pathlib import Path from pathlib import Path
from random import randint from random import randint
@ -11,6 +12,7 @@ from re import search as re_SEARCH
from re import sub as re_SUB from re import sub as re_SUB
from time import time, sleep from time import time, sleep
from typing import Union, List from typing import Union, List
from urllib import parse
from urllib.parse import urlparse, quote from urllib.parse import urlparse, quote
from requests_html import HTMLSession, HTMLResponse, Element from requests_html import HTMLSession, HTMLResponse, Element
@ -264,14 +266,24 @@ class SessionPage(object):
if show_errmsg: if show_errmsg:
raise ConnectionError(f'Status code: {r.status_code}.') raise ConnectionError(f'Status code: {r.status_code}.')
return False, f'Status code: {r.status_code}.' return False, f'Status code: {r.status_code}.'
# -------------------获取文件名------------------- # -------------------获取文件名-------------------
if 'Content-disposition' in r.headers: # header里有文件名则使用它 file_name = ''
file_name = r.headers['Content-disposition'].split('"')[1].encode('ISO-8859-1').decode('utf-8') content_disposition = tuple(x for x in r.headers if x.lower() == 'content-disposition')
elif os_PATH.basename(file_url): # 在url里获取文件名 if content_disposition: # header里有文件名则使用它
file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8')
file_name = re.search(r'filename *= *"?([^";]+)', file_name)
if file_name:
file_name = file_name.group(1)
if file_name[0] == file_name[-1] == "'":
file_name = file_name.strip("'")
if not file_name and os_PATH.basename(file_url): # 在url里获取文件名
file_name = os_PATH.basename(file_url).split("?")[0] file_name = os_PATH.basename(file_url).split("?")[0]
else: # 找不到则用时间和随机数生成文件名 if not file_name: # 找不到则用时间和随机数生成文件名
file_name = f'untitled_{time()}_{randint(0, 100)}' file_name = f'untitled_{time()}_{randint(0, 100)}'
file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip() # 去除非法字符 file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip() # 去除非法字符
file_name = parse.unquote(file_name)
# -------------------重命名文件名------------------- # -------------------重命名文件名-------------------
if rename: # 重命名文件,不改变扩展名 if rename: # 重命名文件,不改变扩展名
rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip() rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip()
@ -282,6 +294,7 @@ class SessionPage(object):
full_name = f'{rename}.{ext_name}' full_name = f'{rename}.{ext_name}'
else: else:
full_name = file_name full_name = file_name
# -------------------生成路径------------------- # -------------------生成路径-------------------
goal_Path = Path(goal_path) goal_Path = Path(goal_path)
goal_path = '' goal_path = ''
@ -303,6 +316,7 @@ class SessionPage(object):
full_path = Path(f'{goal_path}\\{full_name}') full_path = Path(f'{goal_path}\\{full_name}')
else: else:
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.") raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.")
# -------------------打印要下载的文件------------------- # -------------------打印要下载的文件-------------------
if show_msg: if show_msg:
print(full_name if file_name == full_name else f'{file_name} -> {full_name}') print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
@ -336,6 +350,7 @@ class SessionPage(object):
if not download_status and full_path.exists(): if not download_status and full_path.exists():
full_path.unlink() # 删除下载出错文件 full_path.unlink() # 删除下载出错文件
r.close() r.close()
# -------------------显示并返回值------------------- # -------------------显示并返回值-------------------
if show_msg: if show_msg:
print(info) print(info)