feat(tiktok): 增加合集下载功能

增加单个合集批量下载功能, 增加主页下所有合集批量下载功能
This commit is contained in:
imgyh 2023-02-21 22:16:11 +08:00
parent d338e6bafc
commit b95d718828
3 changed files with 137 additions and 10 deletions

114
TikTok.py
View File

@ -34,7 +34,7 @@ class TikTok(object):
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'referer': 'https://www.douyin.com/',
'Cookie': '__ac_signature=_02B4Z6wo00f01CEKaogAAIDBqkHxaCCYIyghKm4AAGu9c3; s_v_web_id=verify_ledo1j1t_0NwhDQFJ_nLca_42o5_8tAA_T8CWm5E2M6LF; msToken=%s;odin_tt=324fb4ea4a89c0c05827e18a1ed9cf9bf8a17f7705fcc793fec935b637867e2a5a9b8168c885554d029919117a18ba69;' % self.utils.generate_random_str(107)
'Cookie': 'ttwid=1|sGp2L-Krm46cXHcK7BsKghavVeVQIIOYtQInA1LV0-w|1676899557|3e483426230c481bd34f4d6529d6252372c154b75be7d4a2baec8edbfd0a742c; __ac_signature=_02B4Z6wo00f01CEKaogAAIDBqkHxaCCYIyghKm4AAGu9c3; s_v_web_id=verify_ledo1j1t_0NwhDQFJ_nLca_42o5_8tAA_T8CWm5E2M6LF; msToken=%s;odin_tt=324fb4ea4a89c0c05827e18a1ed9cf9bf8a17f7705fcc793fec935b637867e2a5a9b8168c885554d029919117a18ba69;' % self.utils.generate_random_str(107)
}
@ -60,6 +60,8 @@ class TikTok(object):
# https://www.iesdouyin.com/share/video/7037827546599263488/?region=CN&mid=6939809470193126152&u_code=j8a5173b&did=MS4wLjABAAAA1DICF9-A9M_CiGqAJZdsnig5TInVeIyPdc2QQdGrq58xUgD2w6BqCHovtqdIDs2i&iid=MS4wLjABAAAAomGWi4n2T0H9Ab9x96cUZoJXaILk4qXOJlJMZFiK6b_aJbuHkjN_f0mBzfy91DX1&with_sec_did=1&titleType=title&schema_type=37&from_ssr=1&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme
# 用户 第一步解析出来的链接是share/user/{sec_uid}
# https://www.iesdouyin.com/share/user/MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek?did=MS4wLjABAAAA1DICF9-A9M_CiGqAJZdsnig5TInVeIyPdc2QQdGrq58xUgD2w6BqCHovtqdIDs2i&iid=MS4wLjABAAAAomGWi4n2T0H9Ab9x96cUZoJXaILk4qXOJlJMZFiK6b_aJbuHkjN_f0mBzfy91DX1&with_sec_did=1&sec_uid=MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek&from_ssr=1&u_code=j8a5173b&timestamp=1674540164&ecom_share_track_params=%7B%22is_ec_shopping%22%3A%221%22%2C%22secuid%22%3A%22MS4wLjABAAAA-jD2lukp--I21BF8VQsmYUqJDbj3FmU-kGQTHl2y1Cw%22%2C%22enter_from%22%3A%22others_homepage%22%2C%22share_previous_page%22%3A%22others_homepage%22%7D&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme
# 合集
# https://www.douyin.com/collection/7093490319085307918
urlstr = str(r.request.path_url)
if "/share/user/" in urlstr:
@ -75,6 +77,10 @@ class TikTok(object):
# 获取作品 aweme_id
key = re.findall('video/(\d+)?', urlstr)[0]
key_type = "aweme"
elif "/collection/" in urlstr:
# 获取作品 aweme_id
key = re.findall('collection/(\d+)?', urlstr)[0]
key_type = "mix"
elif "live.douyin.com" in r.url:
key = r.url.replace('https://live.douyin.com/', '')
key_type = "live"
@ -82,13 +88,13 @@ class TikTok(object):
if key is None or key_type is None:
print('[ 错误 ]:输入链接有误!无法获取 id\r')
return key_type, key
print('[ 提示 ]:作品或者用户的 id = %s\r' % key)
return key_type, key
# 传入 aweme_id
# 返回 数据 字典
def getAwemeInfo(self, aweme_id):
print('[ 提示 ]:正在请求的作品 id = %s\r\n' % aweme_id)
if aweme_id is None:
return None
@ -126,11 +132,12 @@ class TikTok(object):
# 传入 url 支持 https://www.iesdouyin.com 与 https://v.douyin.com
# mode : post | like 模式选择 like为用户点赞 post为用户发布
def getUserInfo(self, sec_uid, mode="post", count=35):
print('[ 提示 ]:正在请求的用户 id = %s\r\n' % sec_uid)
if sec_uid is None:
return None
max_cursor = 0
self.awemeList = []
awemeList = []
print("[ 提示 ]:正在获取所有作品数据请稍后...\r")
print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n")
@ -145,7 +152,7 @@ class TikTok(object):
url = self.urls.USER_FAVORITE_A + self.utils.getXbogus(
url=f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&aid=1128&version_name=23.5.0&device_platform=android&os_version=2333')
else:
print("[ 错误 ]:模式选择错误, 仅支持post和like, 请检查后重新运行!\r")
print("[ 错误 ]:模式选择错误, 仅支持post、like、mix, 请检查后重新运行!\r")
return None
while True:
@ -164,7 +171,7 @@ class TikTok(object):
aweme_id = aweme["aweme_id"]
# 深拷贝 dict 不然list里面全是同样的数据
datanew, dataraw = self.getAwemeInfo(aweme_id)
self.awemeList.append(copy.deepcopy(datanew))
awemeList.append(copy.deepcopy(datanew))
# 更新 max_cursor
max_cursor = datadict["max_cursor"]
@ -174,11 +181,12 @@ class TikTok(object):
print("[ 提示 ]:所有作品数据获取完成...\r\n")
break
else:
print("[ 提示 ]:第 " + str(times) + " 次请求成功...\r")
print("[ 提示 ]:第 " + str(times) + " 次请求成功...\r\n")
return self.awemeList
return awemeList
def getLiveInfo(self, web_rid: str):
print('[ 提示 ]:正在请求的直播间 id = %s\r\n' % web_rid)
# web_rid = live_url.replace('https://live.douyin.com/', '')
@ -256,6 +264,98 @@ class TikTok(object):
print('[ 📺 ]:复制链接使用下载工具下载')
return self.result.liveDict
def getMixInfo(self, mix_id: str, count=35):
print('[ 提示 ]:正在请求的合集 id = %s\r\n' % mix_id)
if mix_id is None:
return None
cursor = 0
awemeList = []
print("[ 提示 ]:正在获取合集下的所有作品数据请稍后...\r")
print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n")
times = 0
while True:
times = times + 1
print("[ 提示 ]:正在进行第 " + str(times) + " 次请求...\r")
url = 'https://www.douyin.com/aweme/v1/web/mix/aweme/?' + self.utils.getXbogus(
url=f'device_platform=webapp&aid=6383&os_version=10&version_name=17.4.0&mix_id={mix_id}&cursor={cursor}&count={count}')
while True:
# 接口不稳定, 有时服务器不返回数据, 需要重新获取
try:
res = requests.get(url=url, headers=self.headers)
datadict = json.loads(res.text)
print('[ 提示 ]:本次请求返回 ' + str(len(datadict["aweme_list"])) + ' 条数据')
if datadict is not None:
break
except Exception as e:
print("[ 警告 ]:接口未返回数据, 正在重新请求!\r")
for aweme in datadict["aweme_list"]:
# 获取 aweme_id
aweme_id = aweme["aweme_id"]
# 深拷贝 dict 不然list里面全是同样的数据
datanew, dataraw = self.getAwemeInfo(aweme_id)
awemeList.append(copy.deepcopy(datanew))
# 更新 max_cursor
cursor = datadict["cursor"]
# 退出条件
if datadict["has_more"] == 0 or datadict["has_more"] == False:
print("\r\n[ 提示 ]:合集下所有作品数据获取完成...\r\n")
break
else:
print("[ 提示 ]:第 " + str(times) + " 次请求成功...\r\n")
return awemeList
def getUserAllMixInfo(self, sec_uid, count=35):
print('[ 提示 ]:正在请求的用户 id = %s\r\n' % sec_uid)
if sec_uid is None:
return None
cursor = 0
mixIdNameDict = {}
print("[ 提示 ]:正在获取所有合集 id 数据请稍后...\r")
print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n")
times = 0
while True:
times = times + 1
print("[ 提示 ]:正在进行第 " + str(times) + " 次请求...\r")
url = self.urls.USER_MIX_LIST + self.utils.getXbogus(
url=f'device_platform=webapp&aid=6383&os_version=10&version_name=17.4.0&sec_user_id={sec_uid}&count={count}&cursor={cursor}')
while True:
# 接口不稳定, 有时服务器不返回数据, 需要重新获取
try:
res = requests.get(url=url, headers=self.headers)
datadict = json.loads(res.text)
print('[ 提示 ]:本次请求返回 ' + str(len(datadict["mix_infos"])) + ' 条数据')
if datadict is not None and datadict["status_code"] == 0:
break
except Exception as e:
print("[ 警告 ]:接口未返回数据, 正在重新请求!\r")
for mix in datadict["mix_infos"]:
mixIdNameDict[mix["mix_id"]] = mix["mix_name"]
# 更新 max_cursor
cursor = datadict["cursor"]
# 退出条件
if datadict["has_more"] == 0 or datadict["has_more"] == False:
print("[ 提示 ]:所有合集 id 数据获取完成...\r\n")
break
else:
print("[ 提示 ]:第 " + str(times) + " 次请求成功...\r\n")
return mixIdNameDict
# 来自 https://blog.csdn.net/weixin_43347550/article/details/105248223
def progressBarDownload(self, url, filepath):
start = time.time() # 下载开始时间

View File

@ -17,12 +17,13 @@ import argparse
import os
import json
from TikTok import TikTok
from TikTokUtils import Utils
def argument():
parser = argparse.ArgumentParser(description='抖音批量下载工具 使用帮助')
parser.add_argument("--link", "-l",
help="1.作品(视频或图集)个人主页抖音分享链接(删除文案, 保证只有URL, https://v.douyin.com/kcvMpuN/)\r\n"
help="1.作品(视频或图集)、合集、个人主页抖音分享链接(删除文案, 保证只有URL, https://v.douyin.com/kcvMpuN/)\r\n"
"2.解析直播网页版网址(https://live.douyin.com/802939216127)",
type=str, required=True)
parser.add_argument("--path", "-p", help="下载保存位置",
@ -33,7 +34,7 @@ def argument():
type=bool, required=False, default=True)
parser.add_argument("--avatar", "-a", help="是否下载作者的头像(True/False), 默认为True",
type=bool, required=False, default=True)
parser.add_argument("--mode", "-M", help="link是个人主页时, 设置下载发布的作品(post)或喜欢的作品(like), 默认为post",
parser.add_argument("--mode", "-M", help="link是个人主页时, 设置下载发布的作品(post)或喜欢的作品(like)或者用户所有合集(mix), 默认为post",
type=str, required=False, default="post")
args = parser.parse_args()
@ -41,16 +42,33 @@ def argument():
def main():
utils = Utils()
args = argument()
tk = TikTok()
url = tk.getShareLink(args.link)
key_type, key = tk.getKey(url)
if key is None or key_type is None:
return
elif key_type == "user":
elif key_type == "user" and args.mode != 'mix':
datalist = tk.getUserInfo(key, args.mode, 35)
tk.userDownload(awemeList=datalist, music=args.music, cover=args.cover, avatar=args.avatar,
savePath=args.path)
elif key_type == "user" and args.mode == 'mix':
if not os.path.exists(args.path):
os.mkdir(args.path)
mixIdNameDict = tk.getUserAllMixInfo(key, 35)
for mix_id in mixIdNameDict:
print(f'\r\n[ 提示 ]:正在下载合集 [{mixIdNameDict[mix_id]}] 中的作品\r\n')
mix_file_name = utils.replaceStr(mixIdNameDict[mix_id])
datalist = tk.getMixInfo(mix_id, 35)
tk.userDownload(awemeList=datalist, music=args.music, cover=args.cover, avatar=args.avatar,
savePath=os.path.join(args.path, mix_file_name))
print(f'\r\n[ 提示 ]:合集 [{mixIdNameDict[mix_id]}] 中的作品下载完成\r\n')
elif key_type == "mix":
datalist = tk.getMixInfo(key,35)
tk.userDownload(awemeList=datalist, music=args.music, cover=args.cover, avatar=args.avatar,
savePath=args.path)
elif key_type == "aweme":
datanew, dataraw = tk.getAwemeInfo(key)
tk.awemeDownload(awemeDict=datanew, music=args.music, cover=args.cover, avatar=args.avatar,

View File

@ -39,6 +39,7 @@ class Urls(object):
self.POST_DETAIL = 'https://www.douyin.com/aweme/v1/web/aweme/detail/?'
# 用户喜欢A
# 需要 odin_tt
self.USER_FAVORITE_A = 'https://www.douyin.com/aweme/v1/web/aweme/favorite/?'
# 用户喜欢B
@ -59,6 +60,14 @@ class Urls(object):
# 关注用户作品
self.FOLLOW_FEED = 'https://www.douyin.com/aweme/v1/web/follow/feed/?'
# 合集下所有作品
# 只需要X-Bogus
self.USER_MIX = 'https://www.douyin.com/aweme/v1/web/mix/aweme/?'
# 用户所有合集列表
# 需要 ttwid
self.USER_MIX_LIST = 'https://www.douyin.com/aweme/v1/web/mix/list/?'
# X-Bogus Path
# 60 秒内,请求同一URI累计超过 600 次,封锁IP 300 秒
self.GET_XB_PATH = 'https://tiktok.199933.xyz/xb'