From 265c997efcda1ba7439c1d9c7cfe9b7d1c00df84 Mon Sep 17 00:00:00 2001 From: g1879 Date: Tue, 15 Sep 2020 13:43:02 +0800 Subject: [PATCH] =?UTF-8?q?download=E5=87=BD=E6=95=B0=E4=BC=98=E5=8C=96?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E6=96=87=E4=BB=B6=E5=90=8D=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DrissionPage/session_page.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/DrissionPage/session_page.py b/DrissionPage/session_page.py index 539df39..1bfe5ad 100644 --- a/DrissionPage/session_page.py +++ b/DrissionPage/session_page.py @@ -4,6 +4,7 @@ @Contact : g1879@qq.com @File : session_page.py """ +import re from os import path as os_PATH from pathlib import Path from random import randint @@ -11,6 +12,7 @@ from re import search as re_SEARCH from re import sub as re_SUB from time import time, sleep from typing import Union, List +from urllib import parse from urllib.parse import urlparse, quote from requests_html import HTMLSession, HTMLResponse, Element @@ -264,14 +266,24 @@ class SessionPage(object): if show_errmsg: raise ConnectionError(f'Status code: {r.status_code}.') return False, f'Status code: {r.status_code}.' + # -------------------获取文件名------------------- - if 'Content-disposition' in r.headers: # header里有文件名,则使用它 - file_name = r.headers['Content-disposition'].split('"')[1].encode('ISO-8859-1').decode('utf-8') - elif os_PATH.basename(file_url): # 在url里获取文件名 + file_name = '' + content_disposition = tuple(x for x in r.headers if x.lower() == 'content-disposition') + if content_disposition: # header里有文件名,则使用它 + file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8') + file_name = re.search(r'filename *= *"?([^";]+)', file_name) + if file_name: + file_name = file_name.group(1) + if file_name[0] == file_name[-1] == "'": + file_name = file_name.strip("'") + if not file_name and os_PATH.basename(file_url): # 在url里获取文件名 file_name = os_PATH.basename(file_url).split("?")[0] - else: # 找不到则用时间和随机数生成文件名 + if not file_name: # 找不到则用时间和随机数生成文件名 file_name = f'untitled_{time()}_{randint(0, 100)}' file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip() # 去除非法字符 + file_name = parse.unquote(file_name) + # -------------------重命名文件名------------------- if rename: # 重命名文件,不改变扩展名 rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip() @@ -282,6 +294,7 @@ class SessionPage(object): full_name = f'{rename}.{ext_name}' else: full_name = file_name + # -------------------生成路径------------------- goal_Path = Path(goal_path) goal_path = '' @@ -303,6 +316,7 @@ class SessionPage(object): full_path = Path(f'{goal_path}\\{full_name}') else: raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.") + # -------------------打印要下载的文件------------------- if show_msg: print(full_name if file_name == full_name else f'{file_name} -> {full_name}') @@ -336,6 +350,7 @@ class SessionPage(object): if not download_status and full_path.exists(): full_path.unlink() # 删除下载出错文件 r.close() + # -------------------显示并返回值------------------- if show_msg: print(info)