From 942bc359126a0ce9241b44576063bf42b161d02a Mon Sep 17 00:00:00 2001 From: imgyh <1974355683@qq.com> Date: Sun, 21 May 2023 16:17:21 +0800 Subject: [PATCH] =?UTF-8?q?feat(tiktok):=20=E8=A7=84=E8=8C=83=E5=8C=85?= =?UTF-8?q?=E7=9B=AE=E5=BD=95=E7=BB=93=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- DouYinCommand.py | 376 +++++++++++++++++++ WebApi.py | 161 ++++++++ apiproxy/__init__.py | 20 + apiproxy/common/__init__.py | 21 ++ apiproxy/common/utils.py | 201 ++++++++++ apiproxy/douyin/__init__.py | 27 ++ apiproxy/douyin/database.py | 172 +++++++++ apiproxy/douyin/douyin.py | 688 +++++++++++++++++++++++++++++++++++ apiproxy/douyin/douyinapi.py | 398 ++++++++++++++++++++ apiproxy/douyin/download.py | 205 +++++++++++ apiproxy/douyin/result.py | 315 ++++++++++++++++ apiproxy/douyin/urls.py | 80 ++++ apiproxy/tiktok/__init__.py | 18 + 13 files changed, 2682 insertions(+) create mode 100644 DouYinCommand.py create mode 100644 WebApi.py create mode 100644 apiproxy/__init__.py create mode 100644 apiproxy/common/__init__.py create mode 100644 apiproxy/common/utils.py create mode 100644 apiproxy/douyin/__init__.py create mode 100644 apiproxy/douyin/database.py create mode 100644 apiproxy/douyin/douyin.py create mode 100644 apiproxy/douyin/douyinapi.py create mode 100644 apiproxy/douyin/download.py create mode 100644 apiproxy/douyin/result.py create mode 100644 apiproxy/douyin/urls.py create mode 100644 apiproxy/tiktok/__init__.py diff --git a/DouYinCommand.py b/DouYinCommand.py new file mode 100644 index 0000000..2cc499d --- /dev/null +++ b/DouYinCommand.py @@ -0,0 +1,376 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : DouYinCommand.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 16:01 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +import argparse +import os +import sys +import json +import yaml +import time + +from apiproxy.douyin.douyin import Douyin +from apiproxy.douyin.download import Download +from apiproxy.douyin import douyin_headers +from apiproxy.common import utils + +configModel = { + "link": [], + "path": os.getcwd(), + "music": True, + "cover": True, + "avatar": True, + "json": True, + "folderstyle": True, + "mode": ["post"], + "number": { + "post": 0, + "like": 0, + "allmix": 0, + "mix": 0, + "music": 0, + }, + 'database': True, + "increase": { + "post": False, + "like": False, + "allmix": False, + "mix": False, + "music": False, + }, + "thread": 5, + "cookie": None + +} + + +def argument(): + parser = argparse.ArgumentParser(description='抖音批量下载工具 使用帮助') + parser.add_argument("--cmd", "-C", help="使用命令行(True)或者配置文件(False), 默认为False", + type=utils.str2bool, required=False, default=False) + parser.add_argument("--link", "-l", + help="作品(视频或图集)、直播、合集、音乐集合、个人主页的分享链接或者电脑浏览器网址, 可以设置多个链接(删除文案, 保证只有URL, https://v.douyin.com/kcvMpuN/ 或者 https://www.douyin.com/开头的)", + type=str, required=False, default=[], action="append") + parser.add_argument("--path", "-p", help="下载保存位置, 默认当前文件位置", + type=str, required=False, default=os.getcwd()) + parser.add_argument("--music", "-m", help="是否下载视频中的音乐(True/False), 默认为True", + type=utils.str2bool, required=False, default=True) + parser.add_argument("--cover", "-c", help="是否下载视频的封面(True/False), 默认为True, 当下载视频时有效", + type=utils.str2bool, required=False, default=True) + parser.add_argument("--avatar", "-a", help="是否下载作者的头像(True/False), 默认为True", + type=utils.str2bool, required=False, default=True) + parser.add_argument("--json", "-j", help="是否保存获取到的数据(True/False), 默认为True", + type=utils.str2bool, required=False, default=True) + parser.add_argument("--folderstyle", "-fs", help="文件保存风格, 默认为True", + type=utils.str2bool, required=False, default=True) + parser.add_argument("--mode", "-M", help="link是个人主页时, 设置下载发布的作品(post)或喜欢的作品(like)或者用户所有合集(mix), 默认为post, 可以设置多种模式", + type=str, required=False, default=[], action="append") + parser.add_argument("--postnumber", help="主页下作品下载个数设置, 默认为0 全部下载", + type=int, required=False, default=0) + parser.add_argument("--likenumber", help="主页下喜欢下载个数设置, 默认为0 全部下载", + type=int, required=False, default=0) + parser.add_argument("--allmixnumber", help="主页下合集下载个数设置, 默认为0 全部下载", + type=int, required=False, default=0) + parser.add_argument("--mixnumber", help="单个合集下作品下载个数设置, 默认为0 全部下载", + type=int, required=False, default=0) + parser.add_argument("--musicnumber", help="音乐(原声)下作品下载个数设置, 默认为0 全部下载", + type=int, required=False, default=0) + parser.add_argument("--database", "-d", help="是否使用数据库, 默认为True 使用数据库; 如果不使用数据库, 增量更新不可用", + type=utils.str2bool, required=False, default=True) + parser.add_argument("--postincrease", help="是否开启主页作品增量下载(True/False), 默认为False", + type=utils.str2bool, required=False, default=False) + parser.add_argument("--likeincrease", help="是否开启主页喜欢增量下载(True/False), 默认为False", + type=utils.str2bool, required=False, default=False) + parser.add_argument("--allmixincrease", help="是否开启主页合集增量下载(True/False), 默认为False", + type=utils.str2bool, required=False, default=False) + parser.add_argument("--mixincrease", help="是否开启单个合集下作品增量下载(True/False), 默认为False", + type=utils.str2bool, required=False, default=False) + parser.add_argument("--musicincrease", help="是否开启音乐(原声)下作品增量下载(True/False), 默认为False", + type=utils.str2bool, required=False, default=False) + parser.add_argument("--thread", "-t", + help="设置线程数, 默认5个线程", + type=int, required=False, default=5) + parser.add_argument("--cookie", help="设置cookie, 格式: \"name1=value1; name2=value2;\" 注意要加冒号", + type=str, required=False, default='') + args = parser.parse_args() + if args.thread <= 0: + args.thread = 5 + + return args + + +def yamlConfig(): + curPath = os.path.dirname(os.path.realpath(sys.argv[0])) + yamlPath = os.path.join(curPath, "config.yml") + f = open(yamlPath, 'r', encoding='utf-8') + cfg = f.read() + configDict = yaml.load(stream=cfg, Loader=yaml.FullLoader) + + try: + if configDict["link"] != None: + configModel["link"] = configDict["link"] + except Exception as e: + print("[ 警告 ]:link未设置, 程序退出...\r\n") + try: + if configDict["path"] != None: + configModel["path"] = configDict["path"] + except Exception as e: + print("[ 警告 ]:path未设置, 使用当前路径...\r\n") + try: + if configDict["music"] != None: + configModel["music"] = configDict["music"] + except Exception as e: + print("[ 警告 ]:music未设置, 使用默认值True...\r\n") + try: + if configDict["cover"] != None: + configModel["cover"] = configDict["cover"] + except Exception as e: + print("[ 警告 ]:cover未设置, 使用默认值True...\r\n") + try: + if configDict["avatar"] != None: + configModel["avatar"] = configDict["avatar"] + except Exception as e: + print("[ 警告 ]:avatar未设置, 使用默认值True...\r\n") + try: + if configDict["json"] != None: + configModel["json"] = configDict["json"] + except Exception as e: + print("[ 警告 ]:json未设置, 使用默认值True...\r\n") + try: + if configDict["folderstyle"] != None: + configModel["folderstyle"] = configDict["folderstyle"] + except Exception as e: + print("[ 警告 ]:folderstyle未设置, 使用默认值True...\r\n") + try: + if configDict["mode"] != None: + configModel["mode"] = configDict["mode"] + except Exception as e: + print("[ 警告 ]:mode未设置, 使用默认值post...\r\n") + try: + if configDict["number"]["post"] != None: + configModel["number"]["post"] = configDict["number"]["post"] + except Exception as e: + print("[ 警告 ]:post number未设置, 使用默认值0...\r\n") + try: + if configDict["number"]["like"] != None: + configModel["number"]["like"] = configDict["number"]["like"] + except Exception as e: + print("[ 警告 ]:like number未设置, 使用默认值0...\r\n") + try: + if configDict["number"]["allmix"] != None: + configModel["number"]["allmix"] = configDict["number"]["allmix"] + except Exception as e: + print("[ 警告 ]:allmix number未设置, 使用默认值0...\r\n") + try: + if configDict["number"]["mix"] != None: + configModel["number"]["mix"] = configDict["number"]["mix"] + except Exception as e: + print("[ 警告 ]:mix number未设置, 使用默认值0...\r\n") + try: + if configDict["number"]["music"] != None: + configModel["number"]["music"] = configDict["number"]["music"] + except Exception as e: + print("[ 警告 ]:music number未设置, 使用默认值0...\r\n") + try: + if configDict["database"] != None: + configModel["database"] = configDict["database"] + except Exception as e: + print("[ 警告 ]:database未设置, 使用默认值False...\r\n") + try: + if configDict["increase"]["post"] != None: + configModel["increase"]["post"] = configDict["increase"]["post"] + except Exception as e: + print("[ 警告 ]:post 增量更新未设置, 使用默认值False...\r\n") + try: + if configDict["increase"]["like"] != None: + configModel["increase"]["like"] = configDict["increase"]["like"] + except Exception as e: + print("[ 警告 ]:like 增量更新未设置, 使用默认值False...\r\n") + try: + if configDict["increase"]["allmix"] != None: + configModel["increase"]["allmix"] = configDict["increase"]["allmix"] + except Exception as e: + print("[ 警告 ]:allmix 增量更新未设置, 使用默认值False...\r\n") + try: + if configDict["increase"]["mix"] != None: + configModel["increase"]["mix"] = configDict["increase"]["mix"] + except Exception as e: + print("[ 警告 ]:mix 增量更新未设置, 使用默认值False...\r\n") + try: + if configDict["increase"]["music"] != None: + configModel["increase"]["music"] = configDict["increase"]["music"] + except Exception as e: + print("[ 警告 ]:music 增量更新未设置, 使用默认值False...\r\n") + try: + if configDict["thread"] != None: + configModel["thread"] = configDict["thread"] + except Exception as e: + print("[ 警告 ]:thread未设置, 使用默认值5...\r\n") + try: + if configDict["cookies"] != None: + cookiekey = configDict["cookies"].keys() + cookieStr = "" + for i in cookiekey: + cookieStr = cookieStr + i + "=" + configDict["cookies"][i] + "; " + configModel["cookie"] = cookieStr + except Exception as e: + pass + try: + if configDict["cookie"] != None: + configModel["cookie"] = configDict["cookie"] + except Exception as e: + pass + + +def main(): + start = time.time() # 开始时间 + + args = argument() + + if args.cmd: + configModel["link"] = args.link + configModel["path"] = args.path + configModel["music"] = args.music + configModel["cover"] = args.cover + configModel["avatar"] = args.avatar + configModel["json"] = args.json + configModel["folderstyle"] = args.folderstyle + if args.mode == None or args.mode == []: + args.mode = [] + args.mode.append("post") + configModel["mode"] = list(set(args.mode)) + configModel["number"]["post"] = args.postnumber + configModel["number"]["like"] = args.likenumber + configModel["number"]["allmix"] = args.allmixnumber + configModel["number"]["mix"] = args.mixnumber + configModel["number"]["music"] = args.musicnumber + configModel["database"] = args.database + configModel["increase"]["post"] = args.postincrease + configModel["increase"]["like"] = args.likeincrease + configModel["increase"]["allmix"] = args.allmixincrease + configModel["increase"]["mix"] = args.mixincrease + configModel["increase"]["music"] = args.musicincrease + configModel["thread"] = args.thread + configModel["cookie"] = args.cookie + else: + yamlConfig() + + if configModel["link"] == []: + return + + if configModel["cookie"] is not None and configModel["cookie"] != "": + douyin_headers["Cookie"] = configModel["cookie"] + + configModel["path"] = os.path.abspath(configModel["path"]) + print("[ 提示 ]:数据保存路径 " + configModel["path"]) + if not os.path.exists(configModel["path"]): + os.mkdir(configModel["path"]) + + dy = Douyin(database=configModel["database"]) + dl = Download(thread=configModel["thread"], music=configModel["music"], cover=configModel["cover"], + avatar=configModel["avatar"], resjson=configModel["json"], + folderstyle=configModel["folderstyle"]) + + for link in configModel["link"]: + print("--------------------------------------------------------------------------------") + print("[ 提示 ]:正在请求的链接: " + link + "\r\n") + url = dy.getShareLink(link) + key_type, key = dy.getKey(url) + if key_type == "user": + print("[ 提示 ]:正在请求用户主页下作品\r\n") + data = dy.getUserDetailInfo(sec_uid=key) + nickname = "" + if data is not None and data != {}: + nickname = utils.replaceStr(data['user']['nickname']) + + userPath = os.path.join(configModel["path"], "user_" + nickname + "_" + key) + if not os.path.exists(userPath): + os.mkdir(userPath) + + for mode in configModel["mode"]: + print("--------------------------------------------------------------------------------") + print("[ 提示 ]:正在请求用户主页模式: " + mode + "\r\n") + if mode == 'post' or mode == 'like': + datalist = dy.getUserInfo(key, mode, 35, configModel["number"][mode], configModel["increase"][mode]) + if datalist is not None and datalist != []: + modePath = os.path.join(userPath, mode) + if not os.path.exists(modePath): + os.mkdir(modePath) + dl.userDownload(awemeList=datalist, savePath=modePath) + elif mode == 'mix': + mixIdNameDict = dy.getUserAllMixInfo(key, 35, configModel["number"]["allmix"]) + if mixIdNameDict is not None and mixIdNameDict != {}: + for mix_id in mixIdNameDict: + print(f'[ 提示 ]:正在下载合集 [{mixIdNameDict[mix_id]}] 中的作品\r\n') + mix_file_name = utils.replaceStr(mixIdNameDict[mix_id]) + datalist = dy.getMixInfo(mix_id, 35, 0, configModel["increase"]["allmix"], key) + if datalist is not None and datalist != []: + modePath = os.path.join(userPath, mode) + if not os.path.exists(modePath): + os.mkdir(modePath) + dl.userDownload(awemeList=datalist, savePath=os.path.join(modePath, mix_file_name)) + print(f'[ 提示 ]:合集 [{mixIdNameDict[mix_id]}] 中的作品下载完成\r\n') + elif key_type == "mix": + print("[ 提示 ]:正在请求单个合集下作品\r\n") + datalist = dy.getMixInfo(key, 35, configModel["number"]["mix"], configModel["increase"]["mix"], "") + if datalist is not None and datalist != []: + mixname = utils.replaceStr(datalist[0]["mix_info"]["mix_name"]) + mixPath = os.path.join(configModel["path"], "mix_" + mixname + "_" + key) + if not os.path.exists(mixPath): + os.mkdir(mixPath) + dl.userDownload(awemeList=datalist, savePath=mixPath) + elif key_type == "music": + print("[ 提示 ]:正在请求音乐(原声)下作品\r\n") + datalist = dy.getMusicInfo(key, 35, configModel["number"]["music"], configModel["increase"]["music"]) + + if datalist is not None and datalist != []: + musicname = utils.replaceStr(datalist[0]["music"]["title"]) + musicPath = os.path.join(configModel["path"], "music_" + musicname + "_" + key) + if not os.path.exists(musicPath): + os.mkdir(musicPath) + dl.userDownload(awemeList=datalist, savePath=musicPath) + elif key_type == "aweme": + print("[ 提示 ]:正在请求单个作品\r\n") + datanew, dataraw = dy.getAwemeInfo(key) + if datanew is not None and datanew != {}: + datalist = [] + datalist.append(datanew) + awemePath = os.path.join(configModel["path"], "aweme") + if not os.path.exists(awemePath): + os.mkdir(awemePath) + dl.userDownload(awemeList=datalist, savePath=awemePath) + elif key_type == "live": + print("[ 提示 ]:正在进行直播解析\r\n") + live_json = dy.getLiveInfo(key) + if configModel["json"]: + livePath = os.path.join(configModel["path"], "live") + if not os.path.exists(livePath): + os.mkdir(livePath) + live_file_name = utils.replaceStr(key + live_json["nickname"]) + # 保存获取到json + print("[ 提示 ]:正在保存获取到的信息到result.json\r\n") + with open(os.path.join(livePath, live_file_name + ".json"), "w", encoding='utf-8') as f: + f.write(json.dumps(live_json, ensure_ascii=False, indent=2)) + f.close() + + end = time.time() # 结束时间 + print('\n' + '[下载完成]:总耗时: %d分钟%d秒\n' % (int((end - start) / 60), ((end - start) % 60))) # 输出下载用时时间 + + +if __name__ == "__main__": + main() diff --git a/WebApi.py b/WebApi.py new file mode 100644 index 0000000..284ef3b --- /dev/null +++ b/WebApi.py @@ -0,0 +1,161 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : WebApi.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 18:52 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +from flask import * +from apiproxy.douyin.douyinapi import DouyinApi +from apiproxy.douyin import douyin_headers +import argparse + + +def douyinwork(share_link, max_cursor, mode, cookie): + dy = DouyinApi() + + if cookie is not None and cookie != "": + douyin_headers["Cookie"] = cookie + + url = dy.getShareLink(share_link) + key_type, key = dy.getKey(url) + + data = None + rawdata = None + cursor = None + has_more = None + if key_type == "user": + if mode == 'post' or mode == 'like': + data, rawdata, cursor, has_more = dy.getUserInfoApi(sec_uid=key, mode=mode, count=35, + max_cursor=max_cursor) + elif mode == 'mix': + data, rawdata, cursor, has_more = dy.getUserAllMixInfoApi(sec_uid=key, count=35, cursor=max_cursor) + elif mode == 'detail': + rawdata = dy.getUserDetailInfoApi(sec_uid=key) + data = rawdata + elif key_type == "mix": + data, rawdata, cursor, has_more = dy.getMixInfoApi(mix_id=key, count=35, cursor=max_cursor) + elif key_type == "music": + data, rawdata, cursor, has_more = dy.getMusicInfoApi(music_id=key, count=35, cursor=max_cursor) + elif key_type == "aweme": + data, rawdata = dy.getAwemeInfoApi(aweme_id=key) + elif key_type == "live": + data, rawdata = dy.getLiveInfoApi(web_rid=key) + + datadict = {} + + if data is not None and data != []: + datadict["data"] = data + datadict["rawdata"] = rawdata + datadict["cursor"] = cursor + datadict["has_more"] = has_more + datadict["status_code"] = 200 + else: + datadict["status_code"] = 500 + return datadict + + +def deal(mode=None): + usefuldict = {} + if request.headers.get("content_type") == "application/json": + result = request.get_json(force=True) + else: + result = request.form + + share_link = None + cursor = 0 + cookie = None + + try: + share_link = result["share_link"] + cursor = result["cursor"] + cookie = result["cookie"] + except Exception as e: + usefuldict["status_code"] = 500 + + try: + if share_link is not None and share_link != "": + usefuldict = douyinwork(share_link, cursor, mode, cookie) + usefuldict["status_code"] = 200 + except Exception as e: + usefuldict["status_code"] = 500 + return jsonify(usefuldict) + + +app = Flask(__name__) +# 设置编码 +app.config['JSON_AS_ASCII'] = False + + +def argument(): + parser = argparse.ArgumentParser(description='抖音去水印工具 使用帮助') + parser.add_argument("--port", "-p", help="Web端口", + type=int, required=False, default=5000) + args = parser.parse_args() + + return args + + +@app.route("/douyin/music", methods=["POST"]) +def douyinMusic(): + return deal() + + +@app.route("/douyin/mix", methods=["POST"]) +def douyinMix(): + return deal() + + +@app.route("/douyin/user/mix", methods=["POST"]) +def douyinUserMix(): + return deal(mode="mix") + + +@app.route("/douyin/user/like", methods=["POST"]) +def douyinUserLike(): + return deal(mode="like") + + +@app.route("/douyin/user/post", methods=["POST"]) +def douyinUserPost(): + return deal(mode="post") + +@app.route("/douyin/user/detail", methods=["POST"]) +def douyinUserDetail(): + return deal(mode="detail") + +@app.route("/douyin/aweme", methods=["POST"]) +def douyinAweme(): + return deal() + + +@app.route("/douyin/live", methods=["POST"]) +def douyinLive(): + return deal() + + +@app.route("/douyin", methods=["POST"]) +def douyin(): + return deal() + + +@app.route("/", methods=["GET"]) +def index(): + return render_template("index.html") + + +if __name__ == "__main__": + args = argument() + app.run(debug=False, host="0.0.0.0", port=args.port) diff --git a/apiproxy/__init__.py b/apiproxy/__init__.py new file mode 100644 index 0000000..6ff52a4 --- /dev/null +++ b/apiproxy/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : __init__.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 14:32 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +ua = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36' \ No newline at end of file diff --git a/apiproxy/common/__init__.py b/apiproxy/common/__init__.py new file mode 100644 index 0000000..88fdfbe --- /dev/null +++ b/apiproxy/common/__init__.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : __init__.py.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 16:10 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' +from .utils import Utils + +utils = Utils() diff --git a/apiproxy/common/utils.py b/apiproxy/common/utils.py new file mode 100644 index 0000000..699586a --- /dev/null +++ b/apiproxy/common/utils.py @@ -0,0 +1,201 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : utils.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 15:18 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +import random +import requests +import re +import os +import sys +import hashlib +import base64 +import time + +import apiproxy + + +class Utils(object): + def __init__(self): + pass + + def replaceStr(self, filenamestr: str): + """ + 替换非法字符,缩短字符长度,使其能成为文件名 + """ + # 匹配 汉字 字母 数字 空格 + match = "([0-9A-Za-z\u4e00-\u9fa5]+)" + + result = re.findall(match, filenamestr) + + result = "".join(result).strip() + if len(result) > 20: + result = result[:20] + # 去除前后空格 + return result + + def resource_path(self, relative_path): + if getattr(sys, 'frozen', False): # 是否Bundle Resource + base_path = sys._MEIPASS + else: + base_path = os.path.dirname(os.path.abspath(__file__)) + return os.path.join(base_path, relative_path) + + def str2bool(self, v): + if isinstance(v, bool): + return v + if v.lower() in ('yes', 'true', 't', 'y', '1'): + return True + elif v.lower() in ('no', 'false', 'f', 'n', '0'): + return False + else: + return True + + def generate_random_str(self, randomlength=16): + """ + 根据传入长度产生随机字符串 + """ + random_str = '' + base_str = 'ABCDEFGHIGKLMNOPQRSTUVWXYZabcdefghigklmnopqrstuvwxyz0123456789=' + length = len(base_str) - 1 + for _ in range(randomlength): + random_str += base_str[random.randint(0, length)] + return random_str + + # https://www.52pojie.cn/thread-1589242-1-1.html + def getttwid(self): + url = 'https://ttwid.bytedance.com/ttwid/union/register/' + data = '{"region":"cn","aid":1768,"needFid":false,"service":"www.ixigua.com","migrate_info":{"ticket":"","source":"node"},"cbUrlProtocol":"https","union":true}' + res = requests.post(url=url, data=data) + + for i, j in res.cookies.items(): + return j + + def getXbogus(self, payload, form='', ua=apiproxy.ua): + xbogus = self.get_xbogus(payload, ua, form) + params = payload + "&X-Bogus=" + xbogus + return params + + def get_xbogus(self, payload, ua, form): + short_str = "Dkdpgh4ZKsQB80/Mfvw36XI1R25-WUAlEi7NLboqYTOPuzmFjJnryx9HVGcaStCe=" + arr2 = self.get_arr2(payload, ua, form) + + garbled_string = self.get_garbled_string(arr2) + + xbogus = "" + + for i in range(0, 21, 3): + char_code_num0 = garbled_string[i] + char_code_num1 = garbled_string[i + 1] + char_code_num2 = garbled_string[i + 2] + base_num = char_code_num2 | char_code_num1 << 8 | char_code_num0 << 16 + str1 = short_str[(base_num & 16515072) >> 18] + str2 = short_str[(base_num & 258048) >> 12] + str3 = short_str[(base_num & 4032) >> 6] + str4 = short_str[base_num & 63] + xbogus += str1 + str2 + str3 + str4 + + return xbogus + + def get_garbled_string(self, arr2): + p = [ + arr2[0], arr2[10], arr2[1], arr2[11], arr2[2], arr2[12], arr2[3], arr2[13], arr2[4], arr2[14], + arr2[5], arr2[15], arr2[6], arr2[16], arr2[7], arr2[17], arr2[8], arr2[18], arr2[9] + ] + + char_array = [chr(i) for i in p] + f = [] + f.extend([2, 255]) + tmp = ['ÿ'] + bytes_ = self._0x30492c(tmp, "".join(char_array)) + + for i in range(len(bytes_)): + f.append(bytes_[i]) + + return f + + def get_arr2(self, payload, ua, form): + salt_payload_bytes = hashlib.md5(hashlib.md5(payload.encode()).digest()).digest() + salt_payload = [byte for byte in salt_payload_bytes] + + salt_form_bytes = hashlib.md5(hashlib.md5(form.encode()).digest()).digest() + salt_form = [byte for byte in salt_form_bytes] + + ua_key = ['\u0000', '\u0001', '\u000e'] + salt_ua_bytes = hashlib.md5(base64.b64encode(self._0x30492c(ua_key, ua))).digest() + salt_ua = [byte for byte in salt_ua_bytes] + + timestamp = int(time.time()) + canvas = 1489154074 + + arr1 = [ + 64, # 固定 + 0, # 固定 + 1, # 固定 + 14, # 固定 这个还要再看一下,14,12,0都出现过 + salt_payload[14], # payload 相关 + salt_payload[15], + salt_form[14], # form 相关 + salt_form[15], + salt_ua[14], # ua 相关 + salt_ua[15], + (timestamp >> 24) & 255, + (timestamp >> 16) & 255, + (timestamp >> 8) & 255, + (timestamp >> 0) & 255, + (canvas >> 24) & 255, + (canvas >> 16) & 255, + (canvas >> 8) & 255, + (canvas >> 0) & 255, + 64, # 校验位 + ] + + for i in range(1, len(arr1) - 1): + arr1[18] ^= arr1[i] + + arr2 = [arr1[0], arr1[2], arr1[4], arr1[6], arr1[8], arr1[10], arr1[12], arr1[14], arr1[16], arr1[18], arr1[1], + arr1[3], arr1[5], arr1[7], arr1[9], arr1[11], arr1[13], arr1[15], arr1[17]] + + return arr2 + + def _0x30492c(self, a, b): + d = [i for i in range(256)] + c = 0 + result = bytearray(len(b)) + + for i in range(256): + c = (c + d[i] + ord(a[i % len(a)])) % 256 + e = d[i] + d[i] = d[c] + d[c] = e + + t = 0 + c = 0 + + for i in range(len(b)): + t = (t + 1) % 256 + c = (c + d[t]) % 256 + e = d[t] + d[t] = d[c] + d[c] = e + result[i] = ord(b[i]) ^ d[(d[t] + d[c]) % 256] + + return result + + +if __name__ == "__main__": + pass diff --git a/apiproxy/douyin/__init__.py b/apiproxy/douyin/__init__.py new file mode 100644 index 0000000..dff25e3 --- /dev/null +++ b/apiproxy/douyin/__init__.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : __init__.py.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 14:44 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' +import apiproxy +from apiproxy.common import utils + +douyin_headers = { + 'User-Agent': apiproxy.ua, + 'referer': 'https://www.douyin.com/', + 'accept-encoding': None, + 'Cookie': f"msToken={utils.generate_random_str(107)}; ttwid={utils.getttwid()}; odin_tt=324fb4ea4a89c0c05827e18a1ed9cf9bf8a17f7705fcc793fec935b637867e2a5a9b8168c885554d029919117a18ba69; passport_csrf_token=f61602fc63757ae0e4fd9d6bdcee4810;" +} diff --git a/apiproxy/douyin/database.py b/apiproxy/douyin/database.py new file mode 100644 index 0000000..fa50ce5 --- /dev/null +++ b/apiproxy/douyin/database.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : database.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 15:15 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +import sqlite3 +import json + + +class DataBase(object): + def __init__(self): + self.conn = sqlite3.connect('data.db') + self.cursor = self.conn.cursor() + self.create_user_post_table() + self.create_user_like_table() + self.create_mix_table() + self.create_music_table() + + def create_user_post_table(self): + sql = """CREATE TABLE if not exists t_user_post ( + id integer primary key autoincrement, + sec_uid varchar(200), + aweme_id integer unique, + rawdata json + );""" + + try: + self.cursor.execute(sql) + self.conn.commit() + except Exception as e: + pass + + def get_user_post(self, sec_uid: str, aweme_id: int): + sql = """select id, sec_uid, aweme_id, rawdata from t_user_post where sec_uid=? and aweme_id=?;""" + + try: + self.cursor.execute(sql, (sec_uid, aweme_id)) + self.conn.commit() + res = self.cursor.fetchone() + return res + except Exception as e: + pass + + def insert_user_post(self, sec_uid: str, aweme_id: int, data: dict): + insertsql = """insert into t_user_post (sec_uid, aweme_id, rawdata) values(?,?,?);""" + + try: + self.cursor.execute(insertsql, (sec_uid, aweme_id, json.dumps(data))) + self.conn.commit() + except Exception as e: + pass + + def create_user_like_table(self): + sql = """CREATE TABLE if not exists t_user_like ( + id integer primary key autoincrement, + sec_uid varchar(200), + aweme_id integer unique, + rawdata json + );""" + + try: + self.cursor.execute(sql) + self.conn.commit() + except Exception as e: + pass + + def get_user_like(self, sec_uid: str, aweme_id: int): + sql = """select id, sec_uid, aweme_id, rawdata from t_user_like where sec_uid=? and aweme_id=?;""" + + try: + self.cursor.execute(sql, (sec_uid, aweme_id)) + self.conn.commit() + res = self.cursor.fetchone() + return res + except Exception as e: + pass + + def insert_user_like(self, sec_uid: str, aweme_id: int, data: dict): + insertsql = """insert into t_user_like (sec_uid, aweme_id, rawdata) values(?,?,?);""" + + try: + self.cursor.execute(insertsql, (sec_uid, aweme_id, json.dumps(data))) + self.conn.commit() + except Exception as e: + pass + + def create_mix_table(self): + sql = """CREATE TABLE if not exists t_mix ( + id integer primary key autoincrement, + sec_uid varchar(200), + mix_id varchar(200), + aweme_id integer, + rawdata json + );""" + + try: + self.cursor.execute(sql) + self.conn.commit() + except Exception as e: + pass + + def get_mix(self, sec_uid: str, mix_id: str, aweme_id: int): + sql = """select id, sec_uid, mix_id, aweme_id, rawdata from t_mix where sec_uid=? and mix_id=? and aweme_id=?;""" + + try: + self.cursor.execute(sql, (sec_uid, mix_id, aweme_id)) + self.conn.commit() + res = self.cursor.fetchone() + return res + except Exception as e: + pass + + def insert_mix(self, sec_uid: str, mix_id: str, aweme_id: int, data: dict): + insertsql = """insert into t_mix (sec_uid, mix_id, aweme_id, rawdata) values(?,?,?,?);""" + + try: + self.cursor.execute(insertsql, (sec_uid, mix_id, aweme_id, json.dumps(data))) + self.conn.commit() + except Exception as e: + pass + + def create_music_table(self): + sql = """CREATE TABLE if not exists t_music ( + id integer primary key autoincrement, + music_id varchar(200), + aweme_id integer unique, + rawdata json + );""" + + try: + self.cursor.execute(sql) + self.conn.commit() + except Exception as e: + pass + + def get_music(self, music_id: str, aweme_id: int): + sql = """select id, music_id, aweme_id, rawdata from t_music where music_id=? and aweme_id=?;""" + + try: + self.cursor.execute(sql, (music_id, aweme_id)) + self.conn.commit() + res = self.cursor.fetchone() + return res + except Exception as e: + pass + + def insert_music(self, music_id: str, aweme_id: int, data: dict): + insertsql = """insert into t_music (music_id, aweme_id, rawdata) values(?,?,?);""" + + try: + self.cursor.execute(insertsql, (music_id, aweme_id, json.dumps(data))) + self.conn.commit() + except Exception as e: + pass + + +if __name__ == '__main__': + pass diff --git a/apiproxy/douyin/douyin.py b/apiproxy/douyin/douyin.py new file mode 100644 index 0000000..a5e4710 --- /dev/null +++ b/apiproxy/douyin/douyin.py @@ -0,0 +1,688 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : douyin.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 14:52 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +import re +import requests +import json +import time +import copy + +from apiproxy.douyin import douyin_headers +from apiproxy.douyin.urls import Urls +from apiproxy.douyin.result import Result +from apiproxy.douyin.database import DataBase +from apiproxy.common import utils + + +class Douyin(object): + + def __init__(self, database=False): + self.urls = Urls() + self.result = Result() + self.database = database + if database: + self.db = DataBase() + # 用于设置重复请求某个接口的最大时间 + self.timeout = 10 + + # 从分享链接中提取网址 + def getShareLink(self, string): + # findall() 查找匹配正则表达式的字符串 + return re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', string)[0] + + # 得到 作品id 或者 用户id + # 传入 url 支持 https://www.iesdouyin.com 与 https://v.douyin.com + def getKey(self, url): + key = None + key_type = None + + try: + r = requests.get(url=url, headers=douyin_headers) + except Exception as e: + print('[ 错误 ]:输入链接有误!\r') + return key_type, key + + # 抖音把图集更新为note + # 作品 第一步解析出来的链接是share/video/{aweme_id} + # https://www.iesdouyin.com/share/video/7037827546599263488/?region=CN&mid=6939809470193126152&u_code=j8a5173b&did=MS4wLjABAAAA1DICF9-A9M_CiGqAJZdsnig5TInVeIyPdc2QQdGrq58xUgD2w6BqCHovtqdIDs2i&iid=MS4wLjABAAAAomGWi4n2T0H9Ab9x96cUZoJXaILk4qXOJlJMZFiK6b_aJbuHkjN_f0mBzfy91DX1&with_sec_did=1&titleType=title&schema_type=37&from_ssr=1&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme + # 用户 第一步解析出来的链接是share/user/{sec_uid} + # https://www.iesdouyin.com/share/user/MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek?did=MS4wLjABAAAA1DICF9-A9M_CiGqAJZdsnig5TInVeIyPdc2QQdGrq58xUgD2w6BqCHovtqdIDs2i&iid=MS4wLjABAAAAomGWi4n2T0H9Ab9x96cUZoJXaILk4qXOJlJMZFiK6b_aJbuHkjN_f0mBzfy91DX1&with_sec_did=1&sec_uid=MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek&from_ssr=1&u_code=j8a5173b×tamp=1674540164&ecom_share_track_params=%7B%22is_ec_shopping%22%3A%221%22%2C%22secuid%22%3A%22MS4wLjABAAAA-jD2lukp--I21BF8VQsmYUqJDbj3FmU-kGQTHl2y1Cw%22%2C%22enter_from%22%3A%22others_homepage%22%2C%22share_previous_page%22%3A%22others_homepage%22%7D&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme + # 合集 + # https://www.douyin.com/collection/7093490319085307918 + urlstr = str(r.request.path_url) + + if "/user/" in urlstr: + # 获取用户 sec_uid + if '?' in r.request.path_url: + for one in re.finditer(r'user\/([\d\D]*)([?])', str(r.request.path_url)): + key = one.group(1) + else: + for one in re.finditer(r'user\/([\d\D]*)', str(r.request.path_url)): + key = one.group(1) + key_type = "user" + elif "/video/" in urlstr: + # 获取作品 aweme_id + key = re.findall('video/(\d+)?', urlstr)[0] + key_type = "aweme" + elif "/note/" in urlstr: + # 获取note aweme_id + key = re.findall('note/(\d+)?', urlstr)[0] + key_type = "aweme" + elif "/mix/detail/" in urlstr: + # 获取合集 id + key = re.findall('/mix/detail/(\d+)?', urlstr)[0] + key_type = "mix" + elif "/collection/" in urlstr: + # 获取合集 id + key = re.findall('/collection/(\d+)?', urlstr)[0] + key_type = "mix" + elif "/music/" in urlstr: + # 获取原声 id + key = re.findall('music/(\d+)?', urlstr)[0] + key_type = "music" + elif "/webcast/reflow/" in urlstr: + key1 = re.findall('reflow/(\d+)?', urlstr)[0] + url = self.urls.LIVE2 + utils.getXbogus( + f'live_id=1&room_id={key1}&app_id=1128') + res = requests.get(url, headers=douyin_headers) + resjson = json.loads(res.text) + key = resjson['data']['room']['owner']['web_rid'] + key_type = "live" + elif "live.douyin.com" in r.url: + key = r.url.replace('https://live.douyin.com/', '') + key_type = "live" + + if key is None or key_type is None: + print('[ 错误 ]:输入链接有误!无法获取 id\r') + return key_type, key + + return key_type, key + + # 传入 aweme_id + # 返回 数据 字典 + def getAwemeInfo(self, aweme_id): + print('[ 提示 ]:正在请求的作品 id = %s\r' % aweme_id) + if aweme_id is None: + return None + + start = time.time() # 开始时间 + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + # 单作品接口返回 'aweme_detail' + # 主页作品接口返回 'aweme_list'->['aweme_detail'] + jx_url = self.urls.POST_DETAIL + utils.getXbogus( + f'aweme_id={aweme_id}&device_platform=webapp&aid=6383') + + raw = requests.get(url=jx_url, headers=douyin_headers).text + datadict = json.loads(raw) + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据") + return {}, {} + + + # 清空self.awemeDict + self.result.clearDict(self.result.awemeDict) + + # 默认为视频 + awemeType = 0 + try: + # datadict['aweme_detail']["images"] 不为 None 说明是图集 + if datadict['aweme_detail']["images"] is not None: + awemeType = 1 + except Exception as e: + print("[ 警告 ]:接口中未找到 images\r") + + # 转换成我们自己的格式 + self.result.dataConvert(awemeType, self.result.awemeDict, datadict['aweme_detail']) + + return self.result.awemeDict, datadict + + # 传入 url 支持 https://www.iesdouyin.com 与 https://v.douyin.com + # mode : post | like 模式选择 like为用户点赞 post为用户发布 + def getUserInfo(self, sec_uid, mode="post", count=35, number=0, increase=False): + print('[ 提示 ]:正在请求的用户 id = %s\r\n' % sec_uid) + if sec_uid is None: + return None + if number <= 0: + numflag = False + else: + numflag = True + + max_cursor = 0 + awemeList = [] + increaseflag = False + numberis0 = False + + print("[ 提示 ]:正在获取所有作品数据请稍后...\r") + print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n") + times = 0 + while True: + times = times + 1 + print("[ 提示 ]:正在对 [主页] 进行第 " + str(times) + " 次请求...\r") + + start = time.time() # 开始时间 + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + if mode == "post": + url = self.urls.USER_POST + utils.getXbogus( + f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383') + elif mode == "like": + url = self.urls.USER_FAVORITE_A + utils.getXbogus( + f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383') + else: + print("[ 错误 ]:模式选择错误, 仅支持post、like、mix, 请检查后重新运行!\r") + return None + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + print('[ 提示 ]:本次请求返回 ' + str(len(datadict["aweme_list"])) + ' 条数据\r') + + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据") + return awemeList + + + for aweme in datadict["aweme_list"]: + if self.database: + # 退出条件 + if increase is False and numflag and numberis0: + break + if increase and numflag and numberis0 and increaseflag: + break + # 增量更新, 找到非置顶的最新的作品发布时间 + if mode == "post": + if self.db.get_user_post(sec_uid=sec_uid, aweme_id=aweme['aweme_id']) is not None: + if increase and aweme['is_top'] == 0: + increaseflag = True + else: + self.db.insert_user_post(sec_uid=sec_uid, aweme_id=aweme['aweme_id'], data=aweme) + elif mode == "like": + if self.db.get_user_like(sec_uid=sec_uid, aweme_id=aweme['aweme_id']) is not None: + if increase and aweme['is_top'] == 0: + increaseflag = True + else: + self.db.insert_user_like(sec_uid=sec_uid, aweme_id=aweme['aweme_id'], data=aweme) + + # 退出条件 + if increase and numflag is False and increaseflag: + break + if increase and numflag and numberis0 and increaseflag: + break + else: + if numflag and numberis0: + break + + if numflag: + number -= 1 + if number == 0: + numberis0 = True + + # 清空self.awemeDict + self.result.clearDict(self.result.awemeDict) + + # 默认为视频 + awemeType = 0 + try: + if aweme["images"] is not None: + awemeType = 1 + except Exception as e: + print("[ 警告 ]:接口中未找到 images\r") + + # 转换成我们自己的格式 + self.result.dataConvert(awemeType, self.result.awemeDict, aweme) + + if self.result.awemeDict is not None and self.result.awemeDict != {}: + awemeList.append(copy.deepcopy(self.result.awemeDict)) + + if self.database: + if increase and numflag is False and increaseflag: + print("\r\n[ 提示 ]: [主页] 下作品增量更新数据获取完成...\r\n") + break + elif increase is False and numflag and numberis0: + print("\r\n[ 提示 ]: [主页] 下指定数量作品数据获取完成...\r\n") + break + elif increase and numflag and numberis0 and increaseflag: + print("\r\n[ 提示 ]: [主页] 下指定数量作品数据获取完成, 增量更新数据获取完成...\r\n") + break + else: + if numflag and numberis0: + print("\r\n[ 提示 ]: [主页] 下指定数量作品数据获取完成...\r\n") + break + + # 更新 max_cursor + max_cursor = datadict["max_cursor"] + + # 退出条件 + if datadict["has_more"] == 0 or datadict["has_more"] == False: + print("\r\n[ 提示 ]: [主页] 下所有作品数据获取完成...\r\n") + break + else: + print("\r\n[ 提示 ]:[主页] 第 " + str(times) + " 次请求成功...\r\n") + + return awemeList + + def getLiveInfo(self, web_rid: str): + print('[ 提示 ]:正在请求的直播间 id = %s\r\n' % web_rid) + + start = time.time() # 开始时间 + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + live_api = self.urls.LIVE + utils.getXbogus( + f'aid=6383&device_platform=web&web_rid={web_rid}') + + response = requests.get(live_api, headers=douyin_headers) + live_json = json.loads(response.text) + if live_json != {} and live_json['status_code'] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据") + return {} + + # 清空字典 + self.result.clearDict(self.result.liveDict) + + # 类型 + self.result.liveDict["awemeType"] = 2 + # 是否在播 + self.result.liveDict["status"] = live_json['data']['data'][0]['status'] + + if self.result.liveDict["status"] == 4: + print('[ 📺 ]:当前直播已结束,正在退出') + return self.result.liveDict + + # 直播标题 + self.result.liveDict["title"] = live_json['data']['data'][0]['title'] + + # 直播cover + self.result.liveDict["cover"] = live_json['data']['data'][0]['cover']['url_list'][0] + + # 头像 + self.result.liveDict["avatar"] = live_json['data']['data'][0]['owner']['avatar_thumb']['url_list'][0].replace( + "100x100", "1080x1080") + + # 观看人数 + self.result.liveDict["user_count"] = live_json['data']['data'][0]['user_count_str'] + + # 昵称 + self.result.liveDict["nickname"] = live_json['data']['data'][0]['owner']['nickname'] + + # sec_uid + self.result.liveDict["sec_uid"] = live_json['data']['data'][0]['owner']['sec_uid'] + + # 直播间观看状态 + self.result.liveDict["display_long"] = live_json['data']['data'][0]['room_view_stats']['display_long'] + + # 推流 + self.result.liveDict["flv_pull_url"] = live_json['data']['data'][0]['stream_url']['flv_pull_url'] + + try: + # 分区 + self.result.liveDict["partition"] = live_json['data']['partition_road_map']['partition']['title'] + self.result.liveDict["sub_partition"] = \ + live_json['data']['partition_road_map']['sub_partition']['partition']['title'] + except Exception as e: + self.result.liveDict["partition"] = '无' + self.result.liveDict["sub_partition"] = '无' + + info = '[ 💻 ]:直播间:%s 当前%s 主播:%s 分区:%s-%s\r' % ( + self.result.liveDict["title"], self.result.liveDict["display_long"], self.result.liveDict["nickname"], + self.result.liveDict["partition"], self.result.liveDict["sub_partition"]) + print(info) + + flv = [] + print('[ 🎦 ]:直播间清晰度') + for i, f in enumerate(self.result.liveDict["flv_pull_url"].keys()): + print('[ %s ]: %s' % (i, f)) + flv.append(f) + + rate = int(input('[ 🎬 ]输入数字选择推流清晰度:')) + + self.result.liveDict["flv_pull_url0"] = self.result.liveDict["flv_pull_url"][flv[rate]] + + # 显示清晰度列表 + print('[ %s ]:%s' % (flv[rate], self.result.liveDict["flv_pull_url"][flv[rate]])) + print('[ 📺 ]:复制链接使用下载工具下载') + return self.result.liveDict + + def getMixInfo(self, mix_id: str, count=35, number=0, increase=False, sec_uid=''): + print('[ 提示 ]:正在请求的合集 id = %s\r\n' % mix_id) + if mix_id is None: + return None + if number <= 0: + numflag = False + else: + numflag = True + + cursor = 0 + awemeList = [] + increaseflag = False + numberis0 = False + + print("[ 提示 ]:正在获取合集下的所有作品数据请稍后...\r") + print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n") + times = 0 + while True: + times = times + 1 + print("[ 提示 ]:正在对 [合集] 进行第 " + str(times) + " 次请求...\r") + + start = time.time() # 开始时间 + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + url = self.urls.USER_MIX + utils.getXbogus( + f'mix_id={mix_id}&cursor={cursor}&count={count}&device_platform=webapp&aid=6383') + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + print('[ 提示 ]:本次请求返回 ' + str(len(datadict["aweme_list"])) + ' 条数据\r') + + if datadict is not None: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据") + return awemeList + + + for aweme in datadict["aweme_list"]: + if self.database: + # 退出条件 + if increase is False and numflag and numberis0: + break + if increase and numflag and numberis0 and increaseflag: + break + # 增量更新, 找到非置顶的最新的作品发布时间 + if self.db.get_mix(sec_uid=sec_uid, mix_id=mix_id, aweme_id=aweme['aweme_id']) is not None: + if increase and aweme['is_top'] == 0: + increaseflag = True + else: + self.db.insert_mix(sec_uid=sec_uid, mix_id=mix_id, aweme_id=aweme['aweme_id'], data=aweme) + + # 退出条件 + if increase and numflag is False and increaseflag: + break + if increase and numflag and numberis0 and increaseflag: + break + else: + if numflag and numberis0: + break + + if numflag: + number -= 1 + if number == 0: + numberis0 = True + + # 清空self.awemeDict + self.result.clearDict(self.result.awemeDict) + + # 默认为视频 + awemeType = 0 + try: + if aweme["images"] is not None: + awemeType = 1 + except Exception as e: + print("[ 警告 ]:接口中未找到 images\r") + + # 转换成我们自己的格式 + self.result.dataConvert(awemeType, self.result.awemeDict, aweme) + + if self.result.awemeDict is not None and self.result.awemeDict != {}: + awemeList.append(copy.deepcopy(self.result.awemeDict)) + + if self.database: + if increase and numflag is False and increaseflag: + print("\r\n[ 提示 ]: [合集] 下作品增量更新数据获取完成...\r\n") + break + elif increase is False and numflag and numberis0: + print("\r\n[ 提示 ]: [合集] 下指定数量作品数据获取完成...\r\n") + break + elif increase and numflag and numberis0 and increaseflag: + print("\r\n[ 提示 ]: [合集] 下指定数量作品数据获取完成, 增量更新数据获取完成...\r\n") + break + else: + if numflag and numberis0: + print("\r\n[ 提示 ]: [合集] 下指定数量作品数据获取完成...\r\n") + break + + # 更新 max_cursor + cursor = datadict["cursor"] + + # 退出条件 + if datadict["has_more"] == 0 or datadict["has_more"] == False: + print("\r\n[ 提示 ]:[合集] 下所有作品数据获取完成...\r\n") + break + else: + print("\r\n[ 提示 ]:[合集] 第 " + str(times) + " 次请求成功...\r\n") + + return awemeList + + def getUserAllMixInfo(self, sec_uid, count=35, number=0): + print('[ 提示 ]:正在请求的用户 id = %s\r\n' % sec_uid) + if sec_uid is None: + return None + if number <= 0: + numflag = False + else: + numflag = True + + cursor = 0 + mixIdNameDict = {} + + print("[ 提示 ]:正在获取主页下所有合集 id 数据请稍后...\r") + print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n") + times = 0 + while True: + times = times + 1 + print("[ 提示 ]:正在对 [合集列表] 进行第 " + str(times) + " 次请求...\r") + + start = time.time() # 开始时间 + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + url = self.urls.USER_MIX_LIST + utils.getXbogus( + f'sec_user_id={sec_uid}&count={count}&cursor={cursor}&device_platform=webapp&aid=6383') + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + print('[ 提示 ]:本次请求返回 ' + str(len(datadict["mix_infos"])) + ' 条数据\r') + + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据") + return mixIdNameDict + + + for mix in datadict["mix_infos"]: + mixIdNameDict[mix["mix_id"]] = mix["mix_name"] + if numflag: + number -= 1 + if number == 0: + break + if numflag and number == 0: + print("\r\n[ 提示 ]:[合集列表] 下指定数量合集数据获取完成...\r\n") + break + + # 更新 max_cursor + cursor = datadict["cursor"] + + # 退出条件 + if datadict["has_more"] == 0 or datadict["has_more"] == False: + print("[ 提示 ]:[合集列表] 下所有合集 id 数据获取完成...\r\n") + break + else: + print("\r\n[ 提示 ]:[合集列表] 第 " + str(times) + " 次请求成功...\r\n") + + return mixIdNameDict + + def getMusicInfo(self, music_id: str, count=35, number=0, increase=False): + print('[ 提示 ]:正在请求的音乐集合 id = %s\r\n' % music_id) + if music_id is None: + return None + if number <= 0: + numflag = False + else: + numflag = True + + cursor = 0 + awemeList = [] + increaseflag = False + numberis0 = False + + print("[ 提示 ]:正在获取音乐集合下的所有作品数据请稍后...\r") + print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n") + times = 0 + while True: + times = times + 1 + print("[ 提示 ]:正在对 [音乐集合] 进行第 " + str(times) + " 次请求...\r") + + start = time.time() # 开始时间 + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + url = self.urls.MUSIC + utils.getXbogus( + f'music_id={music_id}&cursor={cursor}&count={count}&device_platform=webapp&aid=6383') + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + print('[ 提示 ]:本次请求返回 ' + str(len(datadict["aweme_list"])) + ' 条数据\r') + + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据") + return awemeList + + + for aweme in datadict["aweme_list"]: + if self.database: + # 退出条件 + if increase is False and numflag and numberis0: + break + if increase and numflag and numberis0 and increaseflag: + break + # 增量更新, 找到非置顶的最新的作品发布时间 + if self.db.get_music(music_id=music_id, aweme_id=aweme['aweme_id']) is not None: + if increase and aweme['is_top'] == 0: + increaseflag = True + else: + self.db.insert_music(music_id=music_id, aweme_id=aweme['aweme_id'], data=aweme) + + # 退出条件 + if increase and numflag is False and increaseflag: + break + if increase and numflag and numberis0 and increaseflag: + break + else: + if numflag and numberis0: + break + + if numflag: + number -= 1 + if number == 0: + numberis0 = True + + # 清空self.awemeDict + self.result.clearDict(self.result.awemeDict) + + # 默认为视频 + awemeType = 0 + try: + if aweme["images"] is not None: + awemeType = 1 + except Exception as e: + print("[ 警告 ]:接口中未找到 images\r") + + # 转换成我们自己的格式 + self.result.dataConvert(awemeType, self.result.awemeDict, aweme) + + if self.result.awemeDict is not None and self.result.awemeDict != {}: + awemeList.append(copy.deepcopy(self.result.awemeDict)) + + if self.database: + if increase and numflag is False and increaseflag: + print("\r\n[ 提示 ]: [音乐集合] 下作品增量更新数据获取完成...\r\n") + break + elif increase is False and numflag and numberis0: + print("\r\n[ 提示 ]: [音乐集合] 下指定数量作品数据获取完成...\r\n") + break + elif increase and numflag and numberis0 and increaseflag: + print("\r\n[ 提示 ]: [音乐集合] 下指定数量作品数据获取完成, 增量更新数据获取完成...\r\n") + break + else: + if numflag and numberis0: + print("\r\n[ 提示 ]: [音乐集合] 下指定数量作品数据获取完成...\r\n") + break + + # 更新 cursor + cursor = datadict["cursor"] + + # 退出条件 + if datadict["has_more"] == 0 or datadict["has_more"] == False: + print("\r\n[ 提示 ]:[音乐集合] 下所有作品数据获取完成...\r\n") + break + else: + print("\r\n[ 提示 ]:[音乐集合] 第 " + str(times) + " 次请求成功...\r\n") + + return awemeList + + def getUserDetailInfo(self, sec_uid): + if sec_uid is None: + return None + + datadict = {} + start = time.time() # 开始时间 + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + url = self.urls.USER_DETAIL + utils.getXbogus( + f'sec_user_id={sec_uid}&device_platform=webapp&aid=6383') + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + + if datadict is not None and datadict["status_code"] == 0: + return datadict + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + print("[ 提示 ]:重复请求该接口" + str(self.timeout) + "s, 仍然未获取到数据") + return datadict + + +if __name__ == "__main__": + pass diff --git a/apiproxy/douyin/douyinapi.py b/apiproxy/douyin/douyinapi.py new file mode 100644 index 0000000..f2c99fc --- /dev/null +++ b/apiproxy/douyin/douyinapi.py @@ -0,0 +1,398 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : douyinapi.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/20 22:13 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +import re +import requests +import json +import time +import copy + +from apiproxy.douyin import douyin_headers +from apiproxy.douyin.urls import Urls +from apiproxy.douyin.result import Result +from apiproxy.common import utils + +class DouyinApi(object): + def __init__(self): + self.urls = Urls() + self.result = Result() + # 用于设置重复请求某个接口的最大时间 + self.timeout = 10 + + # 从分享链接中提取网址 + def getShareLink(self, string): + # findall() 查找匹配正则表达式的字符串 + return re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', string)[0] + + # 得到 作品id 或者 用户id + # 传入 url 支持 https://www.iesdouyin.com 与 https://v.douyin.com + def getKey(self, url): + key = None + key_type = None + + try: + r = requests.get(url=url, headers=douyin_headers) + except Exception as e: + print('[ 错误 ]:输入链接有误!\r') + return key_type, key + + # 抖音把图集更新为note + # 作品 第一步解析出来的链接是share/video/{aweme_id} + # https://www.iesdouyin.com/share/video/7037827546599263488/?region=CN&mid=6939809470193126152&u_code=j8a5173b&did=MS4wLjABAAAA1DICF9-A9M_CiGqAJZdsnig5TInVeIyPdc2QQdGrq58xUgD2w6BqCHovtqdIDs2i&iid=MS4wLjABAAAAomGWi4n2T0H9Ab9x96cUZoJXaILk4qXOJlJMZFiK6b_aJbuHkjN_f0mBzfy91DX1&with_sec_did=1&titleType=title&schema_type=37&from_ssr=1&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme + # 用户 第一步解析出来的链接是share/user/{sec_uid} + # https://www.iesdouyin.com/share/user/MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek?did=MS4wLjABAAAA1DICF9-A9M_CiGqAJZdsnig5TInVeIyPdc2QQdGrq58xUgD2w6BqCHovtqdIDs2i&iid=MS4wLjABAAAAomGWi4n2T0H9Ab9x96cUZoJXaILk4qXOJlJMZFiK6b_aJbuHkjN_f0mBzfy91DX1&with_sec_did=1&sec_uid=MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek&from_ssr=1&u_code=j8a5173b×tamp=1674540164&ecom_share_track_params=%7B%22is_ec_shopping%22%3A%221%22%2C%22secuid%22%3A%22MS4wLjABAAAA-jD2lukp--I21BF8VQsmYUqJDbj3FmU-kGQTHl2y1Cw%22%2C%22enter_from%22%3A%22others_homepage%22%2C%22share_previous_page%22%3A%22others_homepage%22%7D&utm_source=copy&utm_campaign=client_share&utm_medium=android&app=aweme + # 合集 + # https://www.douyin.com/collection/7093490319085307918 + urlstr = str(r.request.path_url) + + if "/user/" in urlstr: + # 获取用户 sec_uid + if '?' in r.request.path_url: + for one in re.finditer(r'user\/([\d\D]*)([?])', str(r.request.path_url)): + key = one.group(1) + else: + for one in re.finditer(r'user\/([\d\D]*)', str(r.request.path_url)): + key = one.group(1) + key_type = "user" + elif "/video/" in urlstr: + # 获取作品 aweme_id + key = re.findall('video/(\d+)?', urlstr)[0] + key_type = "aweme" + elif "/note/" in urlstr: + # 获取note aweme_id + key = re.findall('note/(\d+)?', urlstr)[0] + key_type = "aweme" + elif "/mix/detail/" in urlstr: + # 获取合集 id + key = re.findall('/mix/detail/(\d+)?', urlstr)[0] + key_type = "mix" + elif "/collection/" in urlstr: + # 获取合集 id + key = re.findall('/collection/(\d+)?', urlstr)[0] + key_type = "mix" + elif "/music/" in urlstr: + # 获取原声 id + key = re.findall('music/(\d+)?', urlstr)[0] + key_type = "music" + elif "/webcast/reflow/" in urlstr: + key1 = re.findall('reflow/(\d+)?', urlstr)[0] + url = self.urls.LIVE2 + utils.getXbogus( + f'live_id=1&room_id={key1}&app_id=1128') + res = requests.get(url, headers=douyin_headers) + resjson = json.loads(res.text) + key = resjson['data']['room']['owner']['web_rid'] + key_type = "live" + elif "live.douyin.com" in r.url: + key = r.url.replace('https://live.douyin.com/', '') + key_type = "live" + + if key is None or key_type is None: + print('[ 错误 ]:输入链接有误!无法获取 id\r') + return key_type, key + + return key_type, key + + def getAwemeInfoApi(self, aweme_id): + if aweme_id is None: + return None + start = time.time() # 开始时间 + while True: + try: + jx_url = self.urls.POST_DETAIL + utils.getXbogus( + f'aweme_id={aweme_id}&device_platform=webapp&aid=6383') + + raw = requests.get(url=jx_url, headers=douyin_headers).text + datadict = json.loads(raw) + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + return None + + # 清空self.awemeDict + self.result.clearDict(self.result.awemeDict) + + # 默认为视频 + awemeType = 0 + try: + if datadict['aweme_detail']["images"] is not None: + awemeType = 1 + except Exception as e: + pass + + # 转换成我们自己的格式 + self.result.dataConvert(awemeType, self.result.awemeDict, datadict['aweme_detail']) + + return self.result.awemeDict, datadict + + def getUserInfoApi(self, sec_uid, mode="post", count=35, max_cursor=0): + if sec_uid is None: + return None + + awemeList = [] + + start = time.time() # 开始时间 + while True: + try: + if mode == "post": + url = self.urls.USER_POST + utils.getXbogus( + f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383') + elif mode == "like": + url = self.urls.USER_FAVORITE_A + utils.getXbogus( + f'sec_user_id={sec_uid}&count={count}&max_cursor={max_cursor}&device_platform=webapp&aid=6383') + else: + return None + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + return None + + for aweme in datadict["aweme_list"]: + # 清空self.awemeDict + self.result.clearDict(self.result.awemeDict) + + # 默认为视频 + awemeType = 0 + try: + if aweme["images"] is not None: + awemeType = 1 + except Exception as e: + pass + + # 转换成我们自己的格式 + self.result.dataConvert(awemeType, self.result.awemeDict, aweme) + + if self.result.awemeDict is not None and self.result.awemeDict != {}: + awemeList.append(copy.deepcopy(self.result.awemeDict)) + + return awemeList, datadict, datadict["max_cursor"], datadict["has_more"] + + def getLiveInfoApi(self, web_rid: str): + start = time.time() # 开始时间 + while True: + try: + live_api = self.urls.LIVE + utils.getXbogus( + f'aid=6383&device_platform=web&web_rid={web_rid}') + + response = requests.get(live_api, headers=douyin_headers) + live_json = json.loads(response.text) + if live_json != {} and live_json['status_code'] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + return None + + # 清空字典 + self.result.clearDict(self.result.liveDict) + + # 类型 + self.result.liveDict["awemeType"] = 2 + # 是否在播 + self.result.liveDict["status"] = live_json['data']['data'][0]['status'] + + if self.result.liveDict["status"] == 4: + return self.result.liveDict, live_json + + # 直播标题 + self.result.liveDict["title"] = live_json['data']['data'][0]['title'] + + # 直播cover + self.result.liveDict["cover"] = live_json['data']['data'][0]['cover']['url_list'][0] + + # 头像 + self.result.liveDict["avatar"] = live_json['data']['data'][0]['owner']['avatar_thumb']['url_list'][0].replace( + "100x100", "1080x1080") + + # 观看人数 + self.result.liveDict["user_count"] = live_json['data']['data'][0]['user_count_str'] + + # 昵称 + self.result.liveDict["nickname"] = live_json['data']['data'][0]['owner']['nickname'] + + # sec_uid + self.result.liveDict["sec_uid"] = live_json['data']['data'][0]['owner']['sec_uid'] + + # 直播间观看状态 + self.result.liveDict["display_long"] = live_json['data']['data'][0]['room_view_stats']['display_long'] + + # 推流 + self.result.liveDict["flv_pull_url"] = live_json['data']['data'][0]['stream_url']['flv_pull_url'] + + try: + # 分区 + self.result.liveDict["partition"] = live_json['data']['partition_road_map']['partition']['title'] + self.result.liveDict["sub_partition"] = \ + live_json['data']['partition_road_map']['sub_partition']['partition']['title'] + except Exception as e: + self.result.liveDict["partition"] = '无' + self.result.liveDict["sub_partition"] = '无' + + flv = [] + + for i, f in enumerate(self.result.liveDict["flv_pull_url"].keys()): + flv.append(f) + + self.result.liveDict["flv_pull_url0"] = self.result.liveDict["flv_pull_url"][flv[0]] + + return self.result.liveDict, live_json + + def getMixInfoApi(self, mix_id: str, count=35, cursor=0): + if mix_id is None: + return None + + awemeList = [] + + start = time.time() # 开始时间 + while True: + try: + url = self.urls.USER_MIX + utils.getXbogus( + f'mix_id={mix_id}&cursor={cursor}&count={count}&device_platform=webapp&aid=6383') + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + if datadict is not None: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + return None + + for aweme in datadict["aweme_list"]: + + # 清空self.awemeDict + self.result.clearDict(self.result.awemeDict) + + # 默认为视频 + awemeType = 0 + try: + if aweme["images"] is not None: + awemeType = 1 + except Exception as e: + pass + + # 转换成我们自己的格式 + self.result.dataConvert(awemeType, self.result.awemeDict, aweme) + + if self.result.awemeDict is not None and self.result.awemeDict != {}: + awemeList.append(copy.deepcopy(self.result.awemeDict)) + + return awemeList, datadict, datadict["cursor"], datadict["has_more"] + + def getUserAllMixInfoApi(self, sec_uid, count=35, cursor=0): + + if sec_uid is None: + return None + + mixIdlist = [] + + start = time.time() # 开始时间 + while True: + try: + url = self.urls.USER_MIX_LIST + utils.getXbogus( + f'sec_user_id={sec_uid}&count={count}&cursor={cursor}&device_platform=webapp&aid=6383') + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + return None + + for mix in datadict["mix_infos"]: + mixIdNameDict = {} + mixIdNameDict["https://www.douyin.com/collection/" + mix["mix_id"]] = mix["mix_name"] + mixIdlist.append(mixIdNameDict) + + return mixIdlist, datadict, datadict["cursor"], datadict["has_more"] + + def getMusicInfoApi(self, music_id: str, count=35, cursor=0): + if music_id is None: + return None + + awemeList = [] + + start = time.time() # 开始时间 + while True: + try: + url = self.urls.MUSIC + utils.getXbogus( + f'music_id={music_id}&cursor={cursor}&count={count}&device_platform=webapp&aid=6383') + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + if datadict is not None and datadict["status_code"] == 0: + break + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + return None + + for aweme in datadict["aweme_list"]: + # 清空self.awemeDict + self.result.clearDict(self.result.awemeDict) + + # 默认为视频 + awemeType = 0 + try: + if aweme["images"] is not None: + awemeType = 1 + except Exception as e: + pass + + # 转换成我们自己的格式 + self.result.dataConvert(awemeType, self.result.awemeDict, aweme) + + if self.result.awemeDict is not None and self.result.awemeDict != {}: + awemeList.append(copy.deepcopy(self.result.awemeDict)) + + return awemeList, datadict, datadict["cursor"], datadict["has_more"] + + def getUserDetailInfoApi(self, sec_uid): + if sec_uid is None: + return None + + start = time.time() # 开始时间 + while True: + # 接口不稳定, 有时服务器不返回数据, 需要重新获取 + try: + url = self.urls.USER_DETAIL + utils.getXbogus( + f'sec_user_id={sec_uid}&device_platform=webapp&aid=6383') + + res = requests.get(url=url, headers=douyin_headers) + datadict = json.loads(res.text) + + if datadict is not None and datadict["status_code"] == 0: + return datadict + except Exception as e: + end = time.time() # 结束时间 + if end - start > self.timeout: + return None + + + +if __name__ == "__main__": + pass diff --git a/apiproxy/douyin/download.py b/apiproxy/douyin/download.py new file mode 100644 index 0000000..6be3393 --- /dev/null +++ b/apiproxy/douyin/download.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : download.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 15:18 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +import os +import json +import time +import requests +from tqdm import tqdm +from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED + +from apiproxy.douyin import douyin_headers +from apiproxy.common import utils + + +class Download(object): + def __init__(self, thread=5, music=True, cover=True, avatar=True, resjson=True, folderstyle=True): + self.thread = thread + self.music = music + self.cover = cover + self.avatar = avatar + self.resjson = resjson + self.folderstyle = folderstyle + + def progressBarDownload(self, url, filepath, desc): + response = requests.get(url, stream=True, headers=douyin_headers) + chunk_size = 1024 # 每次下载的数据大小 + content_size = int(response.headers['content-length']) # 下载文件总大小 + try: + if response.status_code == 200: # 判断是否响应成功 + with open(filepath, 'wb') as file, tqdm(total=content_size, + unit="iB", + desc=desc, + unit_scale=True, + unit_divisor=1024, + + ) as bar: # 显示进度条 + for data in response.iter_content(chunk_size=chunk_size): + size = file.write(data) + bar.update(size) + except Exception as e: + # 下载异常 删除原来下载的文件, 可能未下成功 + if os.path.exists(filepath): + os.remove(filepath) + print("[ 错误 ]:下载出错\r") + + def awemeDownload(self, awemeDict: dict, savePath=os.getcwd()): + if awemeDict is None: + return + if not os.path.exists(savePath): + os.mkdir(savePath) + + try: + # 使用作品 创建时间+描述 当文件夹 + file_name = awemeDict["create_time"] + "_" + utils.replaceStr(awemeDict["desc"]) + if self.folderstyle: + aweme_path = os.path.join(savePath, file_name) + if not os.path.exists(aweme_path): + os.mkdir(aweme_path) + else: + aweme_path = savePath + + # 保存获取到的字典信息 + if self.resjson: + try: + with open(os.path.join(aweme_path, "result_" + file_name + ".json"), "w", encoding='utf-8') as f: + f.write(json.dumps(awemeDict, ensure_ascii=False, indent=2)) + f.close() + except Exception as e: + print("[ 错误 ]:保存 result.json 失败... 作品名: " + file_name + "\r\n") + + desc = file_name[:30] + # 下载 视频 + if awemeDict["awemeType"] == 0: + video_path = os.path.join(aweme_path, "video_" + file_name + ".mp4") + + if os.path.exists(video_path): + pass + else: + try: + url = awemeDict["video"]["play_addr"]["url_list"][0] + if url != "": + self.isdwownload = False + self.alltask.append( + self.pool.submit(self.progressBarDownload, url, video_path, "[ 视频 ]:" + desc)) + except Exception as e: + print("[ 警告 ]:视频下载失败,请重试... 作品名: " + file_name + "\r\n") + + # 下载 图集 + if awemeDict["awemeType"] == 1: + for ind, image in enumerate(awemeDict["images"]): + image_path = os.path.join(aweme_path, "image_" + file_name + "_" + str(ind) + ".jpeg") + if os.path.exists(image_path): + pass + else: + try: + url = image["url_list"][0] + if url != "": + self.isdwownload = False + self.alltask.append( + self.pool.submit(self.progressBarDownload, url, image_path, "[ 图集 ]:" + desc)) + except Exception as e: + print("[ 警告 ]:图片下载失败,请重试... 作品名: " + file_name + "\r\n") + + # 下载 音乐 + if self.music: + music_name = utils.replaceStr(awemeDict["music"]["title"]) + music_path = os.path.join(aweme_path, "music_" + music_name + "_" + file_name + ".mp3") + + if os.path.exists(music_path): + pass + else: + try: + url = awemeDict["music"]["play_url"]["url_list"][0] + if url != "": + self.isdwownload = False + self.alltask.append( + self.pool.submit(self.progressBarDownload, url, music_path, "[ 原声 ]:" + desc)) + except Exception as e: + print("[ 警告 ]:音乐(原声)下载失败,请重试... 作品名: " + file_name + "\r\n") + + # 下载 cover + if self.cover and awemeDict["awemeType"] == 0: + cover_path = os.path.join(aweme_path, "cover_" + file_name + ".jpeg") + + if os.path.exists(cover_path): + pass + else: + try: + url = awemeDict["video"]["cover"]["url_list"][0] + if url != "": + self.isdwownload = False + self.alltask.append( + self.pool.submit(self.progressBarDownload, url, cover_path, "[ 封面 ]:" + desc)) + except Exception as e: + print("[ 警告 ]:cover下载失败,请重试... 作品名: " + file_name + "\r\n") + + # 下载 avatar + if self.avatar: + avatar_path = os.path.join(aweme_path, "avatar_" + file_name + ".jpeg") + + if os.path.exists(avatar_path): + pass + else: + try: + url = awemeDict["author"]["avatar"]["url_list"][0] + if url != "": + self.isdwownload = False + self.alltask.append( + self.pool.submit(self.progressBarDownload, url, avatar_path, "[ 头像 ]:" + desc)) + except Exception as e: + print("[ 警告 ]:avatar下载失败,请重试... 作品名: " + file_name + "\r\n") + except Exception as e: + print("[ 错误 ]:下载作品时出错\r\n") + + def userDownload(self, awemeList: list, savePath=os.getcwd()): + if awemeList is None: + return + if not os.path.exists(savePath): + os.mkdir(savePath) + + self.alltask = [] + self.pool = ThreadPoolExecutor(max_workers=self.thread) + + start = time.time() # 开始时间 + + for aweme in awemeList: + self.awemeDownload(awemeDict=aweme, savePath=savePath) + + wait(self.alltask, return_when=ALL_COMPLETED) + + # 检查下载是否完成 + while True: + print("[ 提示 ]:正在检查下载是否完成...") + self.isdwownload = True + # 下载上一步失败的 + for aweme in awemeList: + self.awemeDownload(awemeDict=aweme, savePath=savePath) + + wait(self.alltask, return_when=ALL_COMPLETED) + + if self.isdwownload: + break + + end = time.time() # 结束时间 + print('\n' + '[下载完成]:耗时: %d分钟%d秒\n' % (int((end - start) / 60), ((end - start) % 60))) # 输出下载用时时间 + + +if __name__ == "__main__": + pass diff --git a/apiproxy/douyin/result.py b/apiproxy/douyin/result.py new file mode 100644 index 0000000..e7b40b7 --- /dev/null +++ b/apiproxy/douyin/result.py @@ -0,0 +1,315 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : result.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 15:16 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + +import time +import copy + + +class Result(object): + def __init__(self): + # 作者信息 + self.authorDict = { + "avatar_thumb": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + "avatar": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + "cover_url": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + # 喜欢的作品数 + "favoriting_count": "", + # 粉丝数 + "follower_count": "", + # 关注数 + "following_count": "", + # 昵称 + "nickname": "", + # 是否允许下载 + "prevent_download": "", + # 用户 url id + "sec_uid": "", + # 是否私密账号 + "secret": "", + # 短id + "short_id": "", + # 签名 + "signature": "", + # 总获赞数 + "total_favorited": "", + # 用户id + "uid": "", + # 用户自定义唯一id 抖音号 + "unique_id": "", + # 年龄 + "user_age": "", + + } + # 图片信息 + self.picDict = { + "height": "", + "mask_url_list": "", + "uri": "", + "url_list": [], + "width": "" + } + # 音乐信息 + self.musicDict = { + "cover_hd": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + "cover_large": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + "cover_medium": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + "cover_thumb": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + # 音乐作者抖音号 + "owner_handle": "", + # 音乐作者id + "owner_id": "", + # 音乐作者昵称 + "owner_nickname": "", + "play_url": { + "height": "", + "uri": "", + "url_key": "", + "url_list": [], + "width": "" + }, + # 音乐名字 + "title": "", + } + # 视频信息 + self.videoDict = { + "play_addr": { + "uri": "", + "url_list": [], + }, + "cover_original_scale": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + "dynamic_cover": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + "origin_cover": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + }, + "cover": { + "height": "", + "uri": "", + "url_list": [], + "width": "" + } + } + # mix信息 + self.mixInfo = { + "cover_url": { + "height": "", + "uri": "", + "url_list": [], + "width": 720 + }, + "ids": "", + "is_serial_mix": "", + "mix_id": "", + "mix_name": "", + "mix_pic_type": "", + "mix_type": "", + "statis": { + "current_episode": "", + "updated_to_episode": "" + } + } + # 作品信息 + self.awemeDict = { + # 作品创建时间 + "create_time": "", + # awemeType=0 视频, awemeType=1 图集, awemeType=2 直播 + "awemeType": "", + # 作品 id + "aweme_id": "", + # 作者信息 + "author": self.authorDict, + # 作品描述 + "desc": "", + # 图片 + "images": [], + # 音乐 + "music": self.musicDict, + # 合集 + "mix_info": self.mixInfo, + # 视频 + "video": self.videoDict, + # 作品信息统计 + "statistics": { + "admire_count": "", + "collect_count": "", + "comment_count": "", + "digg_count": "", + "play_count": "", + "share_count": "" + } + } + # 用户作品信息 + self.awemeList = [] + # 直播信息 + self.liveDict = { + # awemeType=0 视频, awemeType=1 图集, awemeType=2 直播 + "awemeType": "", + # 是否在播 + "status": "", + # 直播标题 + "title": "", + # 直播cover + "cover": "", + # 头像 + "avatar": "", + # 观看人数 + "user_count": "", + # 昵称 + "nickname": "", + # sec_uid + "sec_uid": "", + # 直播间观看状态 + "display_long": "", + # 推流 + "flv_pull_url": "", + # 分区 + "partition": "", + "sub_partition": "", + # 最清晰的地址 + "flv_pull_url0": "", + } + + + + # 将得到的json数据(dataRaw)精简成自己定义的数据(dataNew) + # 转换得到的数据 + def dataConvert(self, awemeType, dataNew, dataRaw): + for item in dataNew: + try: + # 作品创建时间 + if item == "create_time": + dataNew['create_time'] = time.strftime( + "%Y-%m-%d %H.%M.%S", time.localtime(dataRaw['create_time'])) + continue + # 设置 awemeType + if item == "awemeType": + dataNew["awemeType"] = awemeType + continue + # 当 解析的链接 是图片时 + if item == "images": + if awemeType == 1: + for image in dataRaw[item]: + for i in image: + self.picDict[i] = image[i] + # 字典要深拷贝 + self.awemeDict["images"].append(copy.deepcopy(self.picDict)) + continue + # 当 解析的链接 是视频时 + if item == "video": + if awemeType == 0: + self.dataConvert(awemeType, dataNew[item], dataRaw[item]) + continue + # 将小头像放大 + if item == "avatar": + for i in dataNew[item]: + if i == "url_list": + for j in self.awemeDict["author"]["avatar_thumb"]["url_list"]: + dataNew[item][i].append(j.replace("100x100", "1080x1080")) + elif i == "uri": + dataNew[item][i] = self.awemeDict["author"]["avatar_thumb"][i].replace("100x100", + "1080x1080") + else: + dataNew[item][i] = self.awemeDict["author"]["avatar_thumb"][i] + continue + + # 原来的json是[{}] 而我们的是 {} + if item == "cover_url": + self.dataConvert(awemeType, dataNew[item], dataRaw[item][0]) + continue + + # 根据 uri 获取 1080p 视频 + if item == "play_addr": + dataNew[item]["uri"] = dataRaw["bit_rate"][0]["play_addr"]["uri"] + # 使用 这个api 可以获得1080p + # dataNew[item]["url_list"] = "https://aweme.snssdk.com/aweme/v1/play/?video_id=%s&ratio=1080p&line=0" \ + # % dataNew[item]["uri"] + dataNew[item]["url_list"] = copy.deepcopy(dataRaw["bit_rate"][0]["play_addr"]["url_list"]) + continue + + # 常规 递归遍历 字典 + if isinstance(dataNew[item], dict): + self.dataConvert(awemeType, dataNew[item], dataRaw[item]) + else: + # 赋值 + dataNew[item] = dataRaw[item] + except Exception as e: + # 删除这个警告, 总是让人误会出错了 + # print("[ 警告 ]:转换数据时在接口中未找到 %s\r" % (item)) + pass + + def clearDict(self, data): + for item in data: + # 常规 递归遍历 字典 + if isinstance(data[item], dict): + self.clearDict(data[item]) + elif isinstance(data[item], list): + data[item] = [] + else: + data[item] = "" + + +if __name__ == '__main__': + pass diff --git a/apiproxy/douyin/urls.py b/apiproxy/douyin/urls.py new file mode 100644 index 0000000..4d97c40 --- /dev/null +++ b/apiproxy/douyin/urls.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : urls.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 15:04 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +''' + + +class Urls(object): + def __init__(self): + ######################################### WEB ######################################### + # 首页推荐 + self.TAB_FEED = 'https://www.douyin.com/aweme/v1/web/tab/feed/?' + + # 用户短信息(给多少个用户secid就返回多少的用户信息) + self.USER_SHORT_INFO = 'https://www.douyin.com/aweme/v1/web/im/user/info/?' + + # 用户详细信息 + self.USER_DETAIL = 'https://www.douyin.com/aweme/v1/web/user/profile/other/?' + + # 用户作品 + self.USER_POST = 'https://www.douyin.com/aweme/v1/web/aweme/post/?' + + # 作品信息 + self.POST_DETAIL = 'https://www.douyin.com/aweme/v1/web/aweme/detail/?' + + # 用户喜欢A + # 需要 odin_tt + self.USER_FAVORITE_A = 'https://www.douyin.com/aweme/v1/web/aweme/favorite/?' + + # 用户喜欢B + self.USER_FAVORITE_B = 'https://www.iesdouyin.com/web/api/v2/aweme/like/?' + + # 用户历史 + self.USER_HISTORY = 'https://www.douyin.com/aweme/v1/web/history/read/?' + + # 用户收藏 + self.USER_COLLECTION = 'https://www.douyin.com/aweme/v1/web/aweme/listcollection/?' + + # 用户评论 + self.COMMENT = 'https://www.douyin.com/aweme/v1/web/comment/list/?' + + # 首页朋友作品 + self.FRIEND_FEED = 'https://www.douyin.com/aweme/v1/web/familiar/feed/?' + + # 关注用户作品 + self.FOLLOW_FEED = 'https://www.douyin.com/aweme/v1/web/follow/feed/?' + + # 合集下所有作品 + # 只需要X-Bogus + self.USER_MIX = 'https://www.douyin.com/aweme/v1/web/mix/aweme/?' + + # 用户所有合集列表 + # 需要 ttwid + self.USER_MIX_LIST = 'https://www.douyin.com/aweme/v1/web/mix/list/?' + + # 直播 + self.LIVE = 'https://live.douyin.com/webcast/room/web/enter/?' + self.LIVE2 = 'https://webcast.amemv.com/webcast/room/reflow/info/?' + + # 音乐 + self.MUSIC = 'https://www.douyin.com/aweme/v1/web/music/aweme/?' + + ####################################################################################### + + +if __name__ == '__main__': + pass diff --git a/apiproxy/tiktok/__init__.py b/apiproxy/tiktok/__init__.py new file mode 100644 index 0000000..ee6f04c --- /dev/null +++ b/apiproxy/tiktok/__init__.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +''' +@FileName : __init__.py.py +@Project : apiproxy +@Description: +@Author : imgyh +@Mail : admin@imgyh.com +@Github : https://github.com/imgyh +@Site : https://www.imgyh.com +@Date : 2023/5/12 14:43 +@Version : v1.0 +@ChangeLog +------------------------------------------------ + +------------------------------------------------ +'''