From b95d7188282de5474861043ab011ed27baa79796 Mon Sep 17 00:00:00 2001 From: imgyh <1974355683@qq.com> Date: Tue, 21 Feb 2023 22:16:11 +0800 Subject: [PATCH] =?UTF-8?q?feat(tiktok):=20=E5=A2=9E=E5=8A=A0=E5=90=88?= =?UTF-8?q?=E9=9B=86=E4=B8=8B=E8=BD=BD=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 增加单个合集批量下载功能, 增加主页下所有合集批量下载功能 --- TikTok.py | 114 ++++++++++++++++++++++++++++++++++++++++++++--- TikTokCommand.py | 24 ++++++++-- TikTokUrls.py | 9 ++++ 3 files changed, 137 insertions(+), 10 deletions(-) diff --git a/TikTok.py b/TikTok.py index a87c0f9..3ce1849 100644 --- a/TikTok.py +++ b/TikTok.py @@ -34,7 +34,7 @@ class TikTok(object): self.headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', 'referer': 'https://www.douyin.com/', - 'Cookie': '__ac_signature=_02B4Z6wo00f01CEKaogAAIDBqkHxaCCYIyghKm4AAGu9c3; s_v_web_id=verify_ledo1j1t_0NwhDQFJ_nLca_42o5_8tAA_T8CWm5E2M6LF; msToken=%s;odin_tt=324fb4ea4a89c0c05827e18a1ed9cf9bf8a17f7705fcc793fec935b637867e2a5a9b8168c885554d029919117a18ba69;' % self.utils.generate_random_str(107) + 'Cookie': 'ttwid=1|sGp2L-Krm46cXHcK7BsKghavVeVQIIOYtQInA1LV0-w|1676899557|3e483426230c481bd34f4d6529d6252372c154b75be7d4a2baec8edbfd0a742c; __ac_signature=_02B4Z6wo00f01CEKaogAAIDBqkHxaCCYIyghKm4AAGu9c3; s_v_web_id=verify_ledo1j1t_0NwhDQFJ_nLca_42o5_8tAA_T8CWm5E2M6LF; msToken=%s;odin_tt=324fb4ea4a89c0c05827e18a1ed9cf9bf8a17f7705fcc793fec935b637867e2a5a9b8168c885554d029919117a18ba69;' % self.utils.generate_random_str(107) } @@ -60,6 +60,8 @@ class TikTok(object): # https://www.iesdouyin.com/share/video/7037827546599263488/?region=CN&mid=6939809470193126152&u_code=j8a5173b&did=MS4wLjABAAAA1DICF9-A9M_CiGqAJZdsnig5TInVeIyPdc2QQdGrq58xUgD2w6BqCHovtqdIDs2i&iid=MS4wLjABAAAAomGWi4n2T0H9Ab9x96cUZoJXaILk4qXOJlJMZFiK6b_aJbuHkjN_f0mBzfy91DX1&with_sec_did=1&titleType=title&schema_type=37&from_ssr=1&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme # 用户 第一步解析出来的链接是share/user/{sec_uid} # https://www.iesdouyin.com/share/user/MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek?did=MS4wLjABAAAA1DICF9-A9M_CiGqAJZdsnig5TInVeIyPdc2QQdGrq58xUgD2w6BqCHovtqdIDs2i&iid=MS4wLjABAAAAomGWi4n2T0H9Ab9x96cUZoJXaILk4qXOJlJMZFiK6b_aJbuHkjN_f0mBzfy91DX1&with_sec_did=1&sec_uid=MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek&from_ssr=1&u_code=j8a5173b×tamp=1674540164&ecom_share_track_params=%7B%22is_ec_shopping%22%3A%221%22%2C%22secuid%22%3A%22MS4wLjABAAAA-jD2lukp--I21BF8VQsmYUqJDbj3FmU-kGQTHl2y1Cw%22%2C%22enter_from%22%3A%22others_homepage%22%2C%22share_previous_page%22%3A%22others_homepage%22%7D&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme + # 合集 + # https://www.douyin.com/collection/7093490319085307918 urlstr = str(r.request.path_url) if "/share/user/" in urlstr: @@ -75,6 +77,10 @@ class TikTok(object): # 获取作品 aweme_id key = re.findall('video/(\d+)?', urlstr)[0] key_type = "aweme" + elif "/collection/" in urlstr: + # 获取作品 aweme_id + key = re.findall('collection/(\d+)?', urlstr)[0] + key_type = "mix" elif "live.douyin.com" in r.url: key = r.url.replace('https://live.douyin.com/', '') key_type = "live" @@ -82,13 +88,13 @@ class TikTok(object): if key is None or key_type is None: print('[ 错误 ]:输入链接有误!无法获取 id\r') return key_type, key - print('[ 提示 ]:作品或者用户的 id = %s\r' % key) return key_type, key # 传入 aweme_id # 返回 数据 字典 def getAwemeInfo(self, aweme_id): + print('[ 提示 ]:正在请求的作品 id = %s\r\n' % aweme_id) if aweme_id is None: return None @@ -126,11 +132,12 @@ class TikTok(object): # 传入 url 支持 https://www.iesdouyin.com 与 https://v.douyin.com # mode : post | like 模式选择 like为用户点赞 post为用户发布 def getUserInfo(self, sec_uid, mode="post", count=35): + print('[ 提示 ]:正在请求的用户 id = %s\r\n' % sec_uid) if sec_uid is None: return None max_cursor = 0 - self.awemeList = [] + awemeList = [] print("[ 提示 ]:正在获取所有作品数据请稍后...\r") print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n") @@ -145,7 +152,7 @@ class TikTok(object): url = self.urls.USER_FAVORITE_A + self.utils.getXbogus( url=f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&aid=1128&version_name=23.5.0&device_platform=android&os_version=2333') else: - print("[ 错误 ]:模式选择错误, 仅支持post和like, 请检查后重新运行!\r") + print("[ 错误 ]:模式选择错误, 仅支持post、like、mix, 请检查后重新运行!\r") return None while True: @@ -164,7 +171,7 @@ class TikTok(object): aweme_id = aweme["aweme_id"] # 深拷贝 dict 不然list里面全是同样的数据 datanew, dataraw = self.getAwemeInfo(aweme_id) - self.awemeList.append(copy.deepcopy(datanew)) + awemeList.append(copy.deepcopy(datanew)) # 更新 max_cursor max_cursor = datadict["max_cursor"] @@ -174,11 +181,12 @@ class TikTok(object): print("[ 提示 ]:所有作品数据获取完成...\r\n") break else: - print("[ 提示 ]:第 " + str(times) + " 次请求成功...\r") + print("[ 提示 ]:第 " + str(times) + " 次请求成功...\r\n") - return self.awemeList + return awemeList def getLiveInfo(self, web_rid: str): + print('[ 提示 ]:正在请求的直播间 id = %s\r\n' % web_rid) # web_rid = live_url.replace('https://live.douyin.com/', '') @@ -256,6 +264,98 @@ class TikTok(object): print('[ 📺 ]:复制链接使用下载工具下载') return self.result.liveDict + def getMixInfo(self, mix_id: str, count=35): + print('[ 提示 ]:正在请求的合集 id = %s\r\n' % mix_id) + if mix_id is None: + return None + + cursor = 0 + awemeList = [] + + print("[ 提示 ]:正在获取合集下的所有作品数据请稍后...\r") + print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n") + times = 0 + while True: + times = times + 1 + print("[ 提示 ]:正在进行第 " + str(times) + " 次请求...\r") + + url = 'https://www.douyin.com/aweme/v1/web/mix/aweme/?' + self.utils.getXbogus( + url=f'device_platform=webapp&aid=6383&os_version=10&version_name=17.4.0&mix_id={mix_id}&cursor={cursor}&count={count}') + + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + res = requests.get(url=url, headers=self.headers) + datadict = json.loads(res.text) + print('[ 提示 ]:本次请求返回 ' + str(len(datadict["aweme_list"])) + ' 条数据') + if datadict is not None: + break + except Exception as e: + print("[ 警告 ]:接口未返回数据, 正在重新请求!\r") + + for aweme in datadict["aweme_list"]: + # 获取 aweme_id + aweme_id = aweme["aweme_id"] + # 深拷贝 dict 不然list里面全是同样的数据 + datanew, dataraw = self.getAwemeInfo(aweme_id) + awemeList.append(copy.deepcopy(datanew)) + + # 更新 max_cursor + cursor = datadict["cursor"] + + # 退出条件 + if datadict["has_more"] == 0 or datadict["has_more"] == False: + print("\r\n[ 提示 ]:合集下所有作品数据获取完成...\r\n") + break + else: + print("[ 提示 ]:第 " + str(times) + " 次请求成功...\r\n") + + return awemeList + + def getUserAllMixInfo(self, sec_uid, count=35): + print('[ 提示 ]:正在请求的用户 id = %s\r\n' % sec_uid) + if sec_uid is None: + return None + + cursor = 0 + mixIdNameDict = {} + + print("[ 提示 ]:正在获取所有合集 id 数据请稍后...\r") + print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n") + times = 0 + while True: + times = times + 1 + print("[ 提示 ]:正在进行第 " + str(times) + " 次请求...\r") + + url = self.urls.USER_MIX_LIST + self.utils.getXbogus( + url=f'device_platform=webapp&aid=6383&os_version=10&version_name=17.4.0&sec_user_id={sec_uid}&count={count}&cursor={cursor}') + + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + res = requests.get(url=url, headers=self.headers) + datadict = json.loads(res.text) + print('[ 提示 ]:本次请求返回 ' + str(len(datadict["mix_infos"])) + ' 条数据') + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + print("[ 警告 ]:接口未返回数据, 正在重新请求!\r") + + for mix in datadict["mix_infos"]: + mixIdNameDict[mix["mix_id"]] = mix["mix_name"] + + # 更新 max_cursor + cursor = datadict["cursor"] + + # 退出条件 + if datadict["has_more"] == 0 or datadict["has_more"] == False: + print("[ 提示 ]:所有合集 id 数据获取完成...\r\n") + break + else: + print("[ 提示 ]:第 " + str(times) + " 次请求成功...\r\n") + + return mixIdNameDict + # 来自 https://blog.csdn.net/weixin_43347550/article/details/105248223 def progressBarDownload(self, url, filepath): start = time.time() # 下载开始时间 diff --git a/TikTokCommand.py b/TikTokCommand.py index ecdc2cc..5b0fd5b 100644 --- a/TikTokCommand.py +++ b/TikTokCommand.py @@ -17,12 +17,13 @@ import argparse import os import json from TikTok import TikTok +from TikTokUtils import Utils def argument(): parser = argparse.ArgumentParser(description='抖音批量下载工具 使用帮助') parser.add_argument("--link", "-l", - help="1.作品(视频或图集)与个人主页抖音分享链接(删除文案, 保证只有URL, https://v.douyin.com/kcvMpuN/)\r\n" + help="1.作品(视频或图集)、合集、个人主页抖音分享链接(删除文案, 保证只有URL, https://v.douyin.com/kcvMpuN/)\r\n" "2.解析直播网页版网址(https://live.douyin.com/802939216127)", type=str, required=True) parser.add_argument("--path", "-p", help="下载保存位置", @@ -33,7 +34,7 @@ def argument(): type=bool, required=False, default=True) parser.add_argument("--avatar", "-a", help="是否下载作者的头像(True/False), 默认为True", type=bool, required=False, default=True) - parser.add_argument("--mode", "-M", help="link是个人主页时, 设置下载发布的作品(post)或喜欢的作品(like), 默认为post", + parser.add_argument("--mode", "-M", help="link是个人主页时, 设置下载发布的作品(post)或喜欢的作品(like)或者用户所有合集(mix), 默认为post", type=str, required=False, default="post") args = parser.parse_args() @@ -41,16 +42,33 @@ def argument(): def main(): + utils = Utils() args = argument() tk = TikTok() url = tk.getShareLink(args.link) key_type, key = tk.getKey(url) if key is None or key_type is None: return - elif key_type == "user": + elif key_type == "user" and args.mode != 'mix': datalist = tk.getUserInfo(key, args.mode, 35) tk.userDownload(awemeList=datalist, music=args.music, cover=args.cover, avatar=args.avatar, savePath=args.path) + elif key_type == "user" and args.mode == 'mix': + if not os.path.exists(args.path): + os.mkdir(args.path) + mixIdNameDict = tk.getUserAllMixInfo(key, 35) + + for mix_id in mixIdNameDict: + print(f'\r\n[ 提示 ]:正在下载合集 [{mixIdNameDict[mix_id]}] 中的作品\r\n') + mix_file_name = utils.replaceStr(mixIdNameDict[mix_id]) + datalist = tk.getMixInfo(mix_id, 35) + tk.userDownload(awemeList=datalist, music=args.music, cover=args.cover, avatar=args.avatar, + savePath=os.path.join(args.path, mix_file_name)) + print(f'\r\n[ 提示 ]:合集 [{mixIdNameDict[mix_id]}] 中的作品下载完成\r\n') + elif key_type == "mix": + datalist = tk.getMixInfo(key,35) + tk.userDownload(awemeList=datalist, music=args.music, cover=args.cover, avatar=args.avatar, + savePath=args.path) elif key_type == "aweme": datanew, dataraw = tk.getAwemeInfo(key) tk.awemeDownload(awemeDict=datanew, music=args.music, cover=args.cover, avatar=args.avatar, diff --git a/TikTokUrls.py b/TikTokUrls.py index ac0f8b5..51d4f77 100644 --- a/TikTokUrls.py +++ b/TikTokUrls.py @@ -39,6 +39,7 @@ class Urls(object): self.POST_DETAIL = 'https://www.douyin.com/aweme/v1/web/aweme/detail/?' # 用户喜欢A + # 需要 odin_tt self.USER_FAVORITE_A = 'https://www.douyin.com/aweme/v1/web/aweme/favorite/?' # 用户喜欢B @@ -59,6 +60,14 @@ class Urls(object): # 关注用户作品 self.FOLLOW_FEED = 'https://www.douyin.com/aweme/v1/web/follow/feed/?' + # 合集下所有作品 + # 只需要X-Bogus + self.USER_MIX = 'https://www.douyin.com/aweme/v1/web/mix/aweme/?' + + # 用户所有合集列表 + # 需要 ttwid + self.USER_MIX_LIST = 'https://www.douyin.com/aweme/v1/web/mix/list/?' + # X-Bogus Path # 60 秒内,请求同一URI累计超过 600 次,封锁IP 300 秒 self.GET_XB_PATH = 'https://tiktok.199933.xyz/xb'