mirror of
https://gitee.com/g1879/DrissionPage.git
synced 2024-12-10 04:00:23 +08:00
download()增加retry,未完成
This commit is contained in:
parent
1cab918598
commit
7fcde1a67d
@ -310,6 +310,8 @@ class SessionPage(object):
|
|||||||
post_data: dict = None,
|
post_data: dict = None,
|
||||||
show_msg: bool = False,
|
show_msg: bool = False,
|
||||||
show_errmsg: bool = False,
|
show_errmsg: bool = False,
|
||||||
|
retry: int = None,
|
||||||
|
interval: float = None,
|
||||||
**kwargs) -> tuple:
|
**kwargs) -> tuple:
|
||||||
"""下载一个文件 \n
|
"""下载一个文件 \n
|
||||||
:param file_url: 文件url
|
:param file_url: 文件url
|
||||||
@ -319,163 +321,189 @@ class SessionPage(object):
|
|||||||
:param post_data: post方式的数据
|
:param post_data: post方式的数据
|
||||||
:param show_msg: 是否显示下载信息
|
:param show_msg: 是否显示下载信息
|
||||||
:param show_errmsg: 是否抛出和显示异常
|
:param show_errmsg: 是否抛出和显示异常
|
||||||
|
:param retry: 重试次数
|
||||||
|
:param interval: 重试间隔时间
|
||||||
:param kwargs: 连接参数
|
:param kwargs: 连接参数
|
||||||
:return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组
|
:return: 下载是否成功(bool)和状态信息(成功时信息为文件路径)的元组
|
||||||
"""
|
"""
|
||||||
# 生成的response不写入self._response,是临时的
|
|
||||||
if file_exists == 'skip' and Path(f'{goal_path}\\{rename}').exists():
|
if file_exists == 'skip' and Path(f'{goal_path}\\{rename}').exists():
|
||||||
if show_msg:
|
if show_msg:
|
||||||
print(f'{file_url}\n{goal_path}\\{rename}\nSkipped.\n')
|
print(f'{file_url}\n{goal_path}\\{rename}\nSkipped.\n')
|
||||||
|
|
||||||
return False, 'Skipped because a file with the same name already exists.'
|
return False, 'Skipped because a file with the same name already exists.'
|
||||||
|
|
||||||
kwargs['stream'] = True
|
def do(url: str,
|
||||||
|
goal: str,
|
||||||
|
new_name: str = None,
|
||||||
|
exists: str = 'rename',
|
||||||
|
data: dict = None,
|
||||||
|
msg: bool = False,
|
||||||
|
errmsg: bool = False,
|
||||||
|
**args) -> tuple:
|
||||||
|
args['stream'] = True
|
||||||
|
|
||||||
if 'timeout' not in kwargs:
|
if 'timeout' not in args:
|
||||||
kwargs['timeout'] = 20
|
args['timeout'] = 20
|
||||||
|
|
||||||
mode = 'post' if post_data else 'get'
|
mode = 'post' if data else 'get'
|
||||||
r, info = self._make_response(file_url, mode=mode, data=post_data, show_errmsg=show_errmsg, **kwargs)
|
# 生成的response不写入self._response,是临时的
|
||||||
|
r, info = self._make_response(url, mode=mode, data=data, show_errmsg=errmsg, **args)
|
||||||
|
|
||||||
if r is None:
|
if r is None:
|
||||||
if show_msg:
|
if msg:
|
||||||
print(info)
|
print(info)
|
||||||
|
|
||||||
return False, info
|
return False, info
|
||||||
|
|
||||||
if not r.ok:
|
if not r.ok:
|
||||||
if show_errmsg:
|
if errmsg:
|
||||||
raise ConnectionError(f'Status code: {r.status_code}.')
|
raise ConnectionError(f'Status code: {r.status_code}.')
|
||||||
|
|
||||||
return False, f'Status code: {r.status_code}.'
|
return False, f'Status code: {r.status_code}.'
|
||||||
|
|
||||||
# -------------------获取文件名-------------------
|
# -------------------获取文件名-------------------
|
||||||
file_name = ''
|
file_name = ''
|
||||||
content_disposition = r.headers.get('content-disposition')
|
content_disposition = r.headers.get('content-disposition')
|
||||||
|
|
||||||
# 使用header里的文件名
|
# 使用header里的文件名
|
||||||
if content_disposition:
|
if content_disposition:
|
||||||
file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8')
|
file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8')
|
||||||
file_name = re.search(r'filename *= *"?([^";]+)', file_name)
|
file_name = re.search(r'filename *= *"?([^";]+)', file_name)
|
||||||
if file_name:
|
|
||||||
file_name = file_name.group(1)
|
|
||||||
|
|
||||||
if file_name[0] == file_name[-1] == "'":
|
if file_name:
|
||||||
file_name = file_name[1:-1]
|
file_name = file_name.group(1)
|
||||||
|
|
||||||
# 在url里获取文件名
|
if file_name[0] == file_name[-1] == "'":
|
||||||
if not file_name and os_PATH.basename(file_url):
|
file_name = file_name[1:-1]
|
||||||
file_name = os_PATH.basename(file_url).split("?")[0]
|
|
||||||
|
|
||||||
# 找不到则用时间和随机数生成文件名
|
# 在url里获取文件名
|
||||||
if not file_name:
|
if not file_name and os_PATH.basename(url):
|
||||||
file_name = f'untitled_{time()}_{randint(0, 100)}'
|
file_name = os_PATH.basename(url).split("?")[0]
|
||||||
|
|
||||||
# 去除非法字符
|
# 找不到则用时间和随机数生成文件名
|
||||||
file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip()
|
if not file_name:
|
||||||
file_name = unquote(file_name)
|
file_name = f'untitled_{time()}_{randint(0, 100)}'
|
||||||
|
|
||||||
# -------------------重命名,不改变扩展名-------------------
|
# 去除非法字符
|
||||||
if rename:
|
file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip()
|
||||||
rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip()
|
file_name = unquote(file_name)
|
||||||
ext_name = file_name.split('.')[-1]
|
|
||||||
|
|
||||||
if '.' in rename or ext_name == file_name:
|
# -------------------重命名,不改变扩展名-------------------
|
||||||
full_name = rename
|
if new_name:
|
||||||
else:
|
new_name = re_SUB(r'[\\/*:|<>?"]', '', new_name).strip()
|
||||||
full_name = f'{rename}.{ext_name}'
|
ext_name = file_name.split('.')[-1]
|
||||||
|
|
||||||
else:
|
if '.' in new_name or ext_name == file_name:
|
||||||
full_name = file_name
|
full_name = new_name
|
||||||
|
else:
|
||||||
# -------------------生成路径-------------------
|
full_name = f'{new_name}.{ext_name}'
|
||||||
goal_Path = Path(goal_path)
|
|
||||||
goal_path = ''
|
|
||||||
skip = False
|
|
||||||
|
|
||||||
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
|
|
||||||
goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
|
|
||||||
goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
|
|
||||||
|
|
||||||
goal_Path = Path(goal_path).absolute()
|
|
||||||
goal_Path.mkdir(parents=True, exist_ok=True)
|
|
||||||
full_path = Path(f'{goal_path}\\{full_name}')
|
|
||||||
|
|
||||||
if full_path.exists():
|
|
||||||
if file_exists == 'rename':
|
|
||||||
full_name = get_available_file_name(goal_path, full_name)
|
|
||||||
full_path = Path(f'{goal_path}\\{full_name}')
|
|
||||||
|
|
||||||
elif file_exists == 'skip':
|
|
||||||
skip = True
|
|
||||||
|
|
||||||
elif file_exists == 'overwrite':
|
|
||||||
pass
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.")
|
full_name = file_name
|
||||||
|
|
||||||
# -------------------打印要下载的文件-------------------
|
# -------------------生成路径-------------------
|
||||||
if show_msg:
|
goal_Path = Path(goal)
|
||||||
print(file_url)
|
goal = ''
|
||||||
print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
|
skip = False
|
||||||
print(f'Downloading to: {goal_path}')
|
|
||||||
|
|
||||||
|
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
|
||||||
|
goal += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
|
||||||
|
goal += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
|
||||||
|
|
||||||
|
goal_Path = Path(goal).absolute()
|
||||||
|
goal_Path.mkdir(parents=True, exist_ok=True)
|
||||||
|
full_path = Path(f'{goal}\\{full_name}')
|
||||||
|
|
||||||
|
if full_path.exists():
|
||||||
|
if file_exists == 'rename':
|
||||||
|
full_name = get_available_file_name(goal, full_name)
|
||||||
|
full_path = Path(f'{goal}\\{full_name}')
|
||||||
|
|
||||||
|
elif exists == 'skip':
|
||||||
|
skip = True
|
||||||
|
|
||||||
|
elif exists == 'overwrite':
|
||||||
|
pass
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.")
|
||||||
|
|
||||||
|
# -------------------打印要下载的文件-------------------
|
||||||
|
if msg:
|
||||||
|
print(file_url)
|
||||||
|
print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
|
||||||
|
print(f'Downloading to: {goal}')
|
||||||
|
|
||||||
|
if skip:
|
||||||
|
print('Skipped.\n')
|
||||||
|
|
||||||
|
# -------------------开始下载-------------------
|
||||||
if skip:
|
if skip:
|
||||||
print('Skipped.\n')
|
return False, 'Skipped because a file with the same name already exists.'
|
||||||
|
|
||||||
# -------------------开始下载-------------------
|
# 获取远程文件大小
|
||||||
if skip:
|
content_length = r.headers.get('content-length')
|
||||||
return False, 'Skipped because a file with the same name already exists.'
|
file_size = int(content_length) if content_length else None
|
||||||
|
|
||||||
# 获取远程文件大小
|
# 已下载文件大小和下载状态
|
||||||
content_length = r.headers.get('content-length')
|
downloaded_size, download_status = 0, False
|
||||||
file_size = int(content_length) if content_length else None
|
|
||||||
|
|
||||||
# 已下载文件大小和下载状态
|
try:
|
||||||
downloaded_size, download_status = 0, False
|
with open(str(full_path), 'wb') as tmpFile:
|
||||||
|
for chunk in r.iter_content(chunk_size=1024):
|
||||||
|
if chunk:
|
||||||
|
tmpFile.write(chunk)
|
||||||
|
|
||||||
try:
|
# 如表头有返回文件大小,显示进度
|
||||||
with open(str(full_path), 'wb') as tmpFile:
|
if msg and file_size:
|
||||||
for chunk in r.iter_content(chunk_size=1024):
|
downloaded_size += 1024
|
||||||
if chunk:
|
rate = downloaded_size / file_size if downloaded_size < file_size else 1
|
||||||
tmpFile.write(chunk)
|
print('\r {:.0%} '.format(rate), end="")
|
||||||
|
|
||||||
# 如表头有返回文件大小,显示进度
|
except Exception as e:
|
||||||
if show_msg and file_size:
|
if errmsg:
|
||||||
downloaded_size += 1024
|
raise ConnectionError(e)
|
||||||
rate = downloaded_size / file_size if downloaded_size < file_size else 1
|
|
||||||
print('\r {:.0%} '.format(rate), end="")
|
|
||||||
|
|
||||||
except Exception as e:
|
download_status, info = False, f'Download failed.\n{e}'
|
||||||
if show_errmsg:
|
|
||||||
raise ConnectionError(e)
|
|
||||||
|
|
||||||
download_status, info = False, f'Download failed.\n{e}'
|
|
||||||
|
|
||||||
else:
|
|
||||||
if full_path.stat().st_size == 0:
|
|
||||||
if show_errmsg:
|
|
||||||
raise ValueError('File size is 0.')
|
|
||||||
|
|
||||||
download_status, info = False, 'File size is 0.'
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
download_status, info = True, str(full_path)
|
if full_path.stat().st_size == 0:
|
||||||
|
if errmsg:
|
||||||
|
raise ValueError('File size is 0.')
|
||||||
|
|
||||||
finally:
|
download_status, info = False, 'File size is 0.'
|
||||||
# 删除下载出错文件
|
|
||||||
if not download_status and full_path.exists():
|
|
||||||
full_path.unlink()
|
|
||||||
|
|
||||||
r.close()
|
else:
|
||||||
|
download_status, info = True, str(full_path)
|
||||||
|
|
||||||
# -------------------显示并返回值-------------------
|
finally:
|
||||||
if show_msg:
|
# 删除下载出错文件
|
||||||
print(info, '\n')
|
if not download_status and full_path.exists():
|
||||||
|
full_path.unlink()
|
||||||
|
|
||||||
info = f'{goal_path}\\{full_name}' if download_status else info
|
r.close()
|
||||||
return download_status, info
|
|
||||||
|
# -------------------显示并返回值-------------------
|
||||||
|
if msg:
|
||||||
|
print(info, '\n')
|
||||||
|
|
||||||
|
info = f'{goal}\\{full_name}' if download_status else info
|
||||||
|
return download_status, info
|
||||||
|
|
||||||
|
retry_times = retry or self.retry_times
|
||||||
|
retry_interval = interval or self.retry_interval
|
||||||
|
result = do(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs)
|
||||||
|
|
||||||
|
if not result[0] and not str(result[1]).startswith('Skipped'):
|
||||||
|
for i in range(retry_times):
|
||||||
|
sleep(retry_interval)
|
||||||
|
|
||||||
|
print(f'重试 {file_url}')
|
||||||
|
result = do(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs)
|
||||||
|
if result[0]:
|
||||||
|
break
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
def _make_response(self,
|
def _make_response(self,
|
||||||
url: str,
|
url: str,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user