download()增加retry,未完成

This commit is contained in:
g1879 2021-02-04 18:35:57 +08:00
parent 1cab918598
commit 7fcde1a67d

View File

@ -310,6 +310,8 @@ class SessionPage(object):
post_data: dict = None, post_data: dict = None,
show_msg: bool = False, show_msg: bool = False,
show_errmsg: bool = False, show_errmsg: bool = False,
retry: int = None,
interval: float = None,
**kwargs) -> tuple: **kwargs) -> tuple:
"""下载一个文件 \n """下载一个文件 \n
:param file_url: 文件url :param file_url: 文件url
@ -319,163 +321,189 @@ class SessionPage(object):
:param post_data: post方式的数据 :param post_data: post方式的数据
:param show_msg: 是否显示下载信息 :param show_msg: 是否显示下载信息
:param show_errmsg: 是否抛出和显示异常 :param show_errmsg: 是否抛出和显示异常
:param retry: 重试次数
:param interval: 重试间隔时间
:param kwargs: 连接参数 :param kwargs: 连接参数
:return: 下载是否成功bool和状态信息成功时信息为文件路径的元组 :return: 下载是否成功bool和状态信息成功时信息为文件路径的元组
""" """
# 生成的response不写入self._response是临时的
if file_exists == 'skip' and Path(f'{goal_path}\\{rename}').exists(): if file_exists == 'skip' and Path(f'{goal_path}\\{rename}').exists():
if show_msg: if show_msg:
print(f'{file_url}\n{goal_path}\\{rename}\nSkipped.\n') print(f'{file_url}\n{goal_path}\\{rename}\nSkipped.\n')
return False, 'Skipped because a file with the same name already exists.' return False, 'Skipped because a file with the same name already exists.'
kwargs['stream'] = True def do(url: str,
goal: str,
new_name: str = None,
exists: str = 'rename',
data: dict = None,
msg: bool = False,
errmsg: bool = False,
**args) -> tuple:
args['stream'] = True
if 'timeout' not in kwargs: if 'timeout' not in args:
kwargs['timeout'] = 20 args['timeout'] = 20
mode = 'post' if post_data else 'get' mode = 'post' if data else 'get'
r, info = self._make_response(file_url, mode=mode, data=post_data, show_errmsg=show_errmsg, **kwargs) # 生成的response不写入self._response是临时的
r, info = self._make_response(url, mode=mode, data=data, show_errmsg=errmsg, **args)
if r is None: if r is None:
if show_msg: if msg:
print(info) print(info)
return False, info return False, info
if not r.ok: if not r.ok:
if show_errmsg: if errmsg:
raise ConnectionError(f'Status code: {r.status_code}.') raise ConnectionError(f'Status code: {r.status_code}.')
return False, f'Status code: {r.status_code}.' return False, f'Status code: {r.status_code}.'
# -------------------获取文件名------------------- # -------------------获取文件名-------------------
file_name = '' file_name = ''
content_disposition = r.headers.get('content-disposition') content_disposition = r.headers.get('content-disposition')
# 使用header里的文件名 # 使用header里的文件名
if content_disposition: if content_disposition:
file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8') file_name = r.headers[content_disposition[0]].encode('ISO-8859-1').decode('utf-8')
file_name = re.search(r'filename *= *"?([^";]+)', file_name) file_name = re.search(r'filename *= *"?([^";]+)', file_name)
if file_name:
file_name = file_name.group(1)
if file_name[0] == file_name[-1] == "'": if file_name:
file_name = file_name[1:-1] file_name = file_name.group(1)
# 在url里获取文件名 if file_name[0] == file_name[-1] == "'":
if not file_name and os_PATH.basename(file_url): file_name = file_name[1:-1]
file_name = os_PATH.basename(file_url).split("?")[0]
# 找不到则用时间和随机数生成文件名 # 在url里获取文件名
if not file_name: if not file_name and os_PATH.basename(url):
file_name = f'untitled_{time()}_{randint(0, 100)}' file_name = os_PATH.basename(url).split("?")[0]
# 去除非法字符 # 找不到则用时间和随机数生成文件名
file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip() if not file_name:
file_name = unquote(file_name) file_name = f'untitled_{time()}_{randint(0, 100)}'
# -------------------重命名,不改变扩展名------------------- # 去除非法字符
if rename: file_name = re_SUB(r'[\\/*:|<>?"]', '', file_name).strip()
rename = re_SUB(r'[\\/*:|<>?"]', '', rename).strip() file_name = unquote(file_name)
ext_name = file_name.split('.')[-1]
if '.' in rename or ext_name == file_name: # -------------------重命名,不改变扩展名-------------------
full_name = rename if new_name:
else: new_name = re_SUB(r'[\\/*:|<>?"]', '', new_name).strip()
full_name = f'{rename}.{ext_name}' ext_name = file_name.split('.')[-1]
else: if '.' in new_name or ext_name == file_name:
full_name = file_name full_name = new_name
else:
# -------------------生成路径------------------- full_name = f'{new_name}.{ext_name}'
goal_Path = Path(goal_path)
goal_path = ''
skip = False
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
goal_path += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
goal_path += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
goal_Path = Path(goal_path).absolute()
goal_Path.mkdir(parents=True, exist_ok=True)
full_path = Path(f'{goal_path}\\{full_name}')
if full_path.exists():
if file_exists == 'rename':
full_name = get_available_file_name(goal_path, full_name)
full_path = Path(f'{goal_path}\\{full_name}')
elif file_exists == 'skip':
skip = True
elif file_exists == 'overwrite':
pass
else: else:
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.") full_name = file_name
# -------------------打印要下载的文件------------------- # -------------------生成路径-------------------
if show_msg: goal_Path = Path(goal)
print(file_url) goal = ''
print(full_name if file_name == full_name else f'{file_name} -> {full_name}') skip = False
print(f'Downloading to: {goal_path}')
for key, i in enumerate(goal_Path.parts): # 去除路径中的非法字符
goal += goal_Path.drive if key == 0 and goal_Path.drive else re_SUB(r'[*:|<>?"]', '', i).strip()
goal += '\\' if i != '\\' and key < len(goal_Path.parts) - 1 else ''
goal_Path = Path(goal).absolute()
goal_Path.mkdir(parents=True, exist_ok=True)
full_path = Path(f'{goal}\\{full_name}')
if full_path.exists():
if file_exists == 'rename':
full_name = get_available_file_name(goal, full_name)
full_path = Path(f'{goal}\\{full_name}')
elif exists == 'skip':
skip = True
elif exists == 'overwrite':
pass
else:
raise ValueError("Argument file_exists can only be 'skip', 'overwrite', 'rename'.")
# -------------------打印要下载的文件-------------------
if msg:
print(file_url)
print(full_name if file_name == full_name else f'{file_name} -> {full_name}')
print(f'Downloading to: {goal}')
if skip:
print('Skipped.\n')
# -------------------开始下载-------------------
if skip: if skip:
print('Skipped.\n') return False, 'Skipped because a file with the same name already exists.'
# -------------------开始下载------------------- # 获取远程文件大小
if skip: content_length = r.headers.get('content-length')
return False, 'Skipped because a file with the same name already exists.' file_size = int(content_length) if content_length else None
# 获取远程文件大小 # 已下载文件大小和下载状态
content_length = r.headers.get('content-length') downloaded_size, download_status = 0, False
file_size = int(content_length) if content_length else None
# 已下载文件大小和下载状态 try:
downloaded_size, download_status = 0, False with open(str(full_path), 'wb') as tmpFile:
for chunk in r.iter_content(chunk_size=1024):
if chunk:
tmpFile.write(chunk)
try: # 如表头有返回文件大小,显示进度
with open(str(full_path), 'wb') as tmpFile: if msg and file_size:
for chunk in r.iter_content(chunk_size=1024): downloaded_size += 1024
if chunk: rate = downloaded_size / file_size if downloaded_size < file_size else 1
tmpFile.write(chunk) print('\r {:.0%} '.format(rate), end="")
# 如表头有返回文件大小,显示进度 except Exception as e:
if show_msg and file_size: if errmsg:
downloaded_size += 1024 raise ConnectionError(e)
rate = downloaded_size / file_size if downloaded_size < file_size else 1
print('\r {:.0%} '.format(rate), end="")
except Exception as e: download_status, info = False, f'Download failed.\n{e}'
if show_errmsg:
raise ConnectionError(e)
download_status, info = False, f'Download failed.\n{e}'
else:
if full_path.stat().st_size == 0:
if show_errmsg:
raise ValueError('File size is 0.')
download_status, info = False, 'File size is 0.'
else: else:
download_status, info = True, str(full_path) if full_path.stat().st_size == 0:
if errmsg:
raise ValueError('File size is 0.')
finally: download_status, info = False, 'File size is 0.'
# 删除下载出错文件
if not download_status and full_path.exists():
full_path.unlink()
r.close() else:
download_status, info = True, str(full_path)
# -------------------显示并返回值------------------- finally:
if show_msg: # 删除下载出错文件
print(info, '\n') if not download_status and full_path.exists():
full_path.unlink()
info = f'{goal_path}\\{full_name}' if download_status else info r.close()
return download_status, info
# -------------------显示并返回值-------------------
if msg:
print(info, '\n')
info = f'{goal}\\{full_name}' if download_status else info
return download_status, info
retry_times = retry or self.retry_times
retry_interval = interval or self.retry_interval
result = do(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs)
if not result[0] and not str(result[1]).startswith('Skipped'):
for i in range(retry_times):
sleep(retry_interval)
print(f'重试 {file_url}')
result = do(file_url, goal_path, rename, file_exists, post_data, show_msg, show_errmsg, **kwargs)
if result[0]:
break
return result
def _make_response(self, def _make_response(self,
url: str, url: str,