feat(tiktok): 增加数据库功能与增量更新功能

re #24
This commit is contained in:
imgyh 2023-04-24 20:42:20 +08:00
parent 87b48aec06
commit ebf6671d33
3 changed files with 332 additions and 25 deletions

128
TikTok.py
View File

@ -39,6 +39,7 @@ from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED
from TikTokUtils import Utils
from TikTokUrls import Urls
from TikTokResult import Result
from TikTokDataBase import db
class TikTok(object):
@ -47,6 +48,7 @@ class TikTok(object):
self.urls = Urls()
self.utils = Utils()
self.result = Result()
self.db = db()
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36',
'referer': 'https://www.douyin.com/',
@ -272,7 +274,7 @@ class TikTok(object):
# 传入 url 支持 https://www.iesdouyin.com 与 https://v.douyin.com
# mode : post | like 模式选择 like为用户点赞 post为用户发布
def getUserInfo(self, sec_uid, mode="post", count=35, number=0):
def getUserInfo(self, sec_uid, mode="post", count=35, number=0, increase=False):
print('[ 提示 ]:正在请求的用户 id = %s\r\n' % sec_uid)
if sec_uid is None:
return None
@ -283,6 +285,8 @@ class TikTok(object):
max_cursor = 0
awemeList = []
increaseflag = False
numberis0 = False
print("[ 提示 ]:正在获取所有作品数据请稍后...\r")
print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n")
@ -320,6 +324,35 @@ class TikTok(object):
# print("[ 警告 ]:接口未返回数据, 正在重新请求!\r")
for aweme in datadict["aweme_list"]:
# 退出条件
if increase is False and numflag and numberis0:
break
if increase and numflag and numberis0 and increaseflag:
break
# 增量更新, 找到非置顶的最新的作品发布时间
if mode == "post":
if self.db.get_user_post(sec_uid=sec_uid, aweme_id=aweme['aweme_id']) is not None:
if increase and aweme['is_top'] == 0:
increaseflag = True
else:
self.db.insert_user_post(sec_uid=sec_uid, aweme_id=aweme['aweme_id'], data=aweme)
elif mode == "like":
if self.db.get_user_like(sec_uid=sec_uid, aweme_id=aweme['aweme_id']) is not None:
if increase and aweme['is_top'] == 0:
increaseflag = True
else:
self.db.insert_user_like(sec_uid=sec_uid, aweme_id=aweme['aweme_id'], data=aweme)
# 退出条件
if increase and numflag is False and increaseflag:
break
if increase and numflag and numberis0 and increaseflag:
break
if numflag:
number -= 1
if number == 0:
numberis0 = True
# 获取 aweme_id
# aweme_id = aweme["aweme_id"]
# 深拷贝 dict 不然list里面全是同样的数据
@ -342,13 +375,15 @@ class TikTok(object):
if self.result.awemeDict is not None and self.result.awemeDict != {}:
awemeList.append(copy.deepcopy(self.result.awemeDict))
if numflag:
number -= 1
if number == 0:
break
if numflag and number == 0:
if increase and numflag is False and increaseflag:
print("\r\n[ 提示 ]: [主页] 下作品增量更新数据获取完成...\r\n")
break
elif increase is False and numflag and numberis0:
print("\r\n[ 提示 ]: [主页] 下指定数量作品数据获取完成...\r\n")
break
elif increase and numflag and numberis0 and increaseflag:
print("\r\n[ 提示 ]: [主页] 下指定数量作品数据获取完成, 增量更新数据获取完成...\r\n")
break
# 更新 max_cursor
max_cursor = datadict["max_cursor"]
@ -563,7 +598,7 @@ class TikTok(object):
return awemeList, datadict, datadict["cursor"], datadict["has_more"]
def getMixInfo(self, mix_id: str, count=35, number=0):
def getMixInfo(self, mix_id: str, count=35, number=0, increase=False, sec_uid=''):
print('[ 提示 ]:正在请求的合集 id = %s\r\n' % mix_id)
if mix_id is None:
return None
@ -574,6 +609,8 @@ class TikTok(object):
cursor = 0
awemeList = []
increaseflag = False
numberis0 = False
print("[ 提示 ]:正在获取合集下的所有作品数据请稍后...\r")
print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n")
@ -604,6 +641,28 @@ class TikTok(object):
# print("[ 警告 ]:接口未返回数据, 正在重新请求!\r")
for aweme in datadict["aweme_list"]:
# 退出条件
if increase is False and numflag and numberis0:
break
if increase and numflag and numberis0 and increaseflag:
break
# 增量更新, 找到非置顶的最新的作品发布时间
if self.db.get_mix(sec_uid=sec_uid, mix_id=mix_id, aweme_id=aweme['aweme_id']) is not None:
if increase and aweme['is_top'] == 0:
increaseflag = True
else:
self.db.insert_mix(sec_uid=sec_uid, mix_id=mix_id, aweme_id=aweme['aweme_id'], data=aweme)
# 退出条件
if increase and numflag is False and increaseflag:
break
if increase and numflag and numberis0 and increaseflag:
break
if numflag:
number -= 1
if number == 0:
numberis0 = True
# 获取 aweme_id
# aweme_id = aweme["aweme_id"]
# 深拷贝 dict 不然list里面全是同样的数据
@ -626,12 +685,14 @@ class TikTok(object):
if self.result.awemeDict is not None and self.result.awemeDict != {}:
awemeList.append(copy.deepcopy(self.result.awemeDict))
if numflag:
number -= 1
if number == 0:
break
if numflag and number == 0:
print("\r\n[ 提示 ]:[合集] 下指定数量作品数据获取完成...\r\n")
if increase and numflag is False and increaseflag:
print("\r\n[ 提示 ]: [合集] 下作品增量更新数据获取完成...\r\n")
break
elif increase is False and numflag and numberis0:
print("\r\n[ 提示 ]: [合集] 下指定数量作品数据获取完成...\r\n")
break
elif increase and numflag and numberis0 and increaseflag:
print("\r\n[ 提示 ]: [合集] 下指定数量作品数据获取完成, 增量更新数据获取完成...\r\n")
break
# 更新 max_cursor
@ -778,7 +839,7 @@ class TikTok(object):
return awemeList, datadict, datadict["cursor"], datadict["has_more"]
def getMusicInfo(self, music_id: str, count=35, number=0):
def getMusicInfo(self, music_id: str, count=35, number=0, increase=False):
print('[ 提示 ]:正在请求的音乐集合 id = %s\r\n' % music_id)
if music_id is None:
return None
@ -789,6 +850,8 @@ class TikTok(object):
cursor = 0
awemeList = []
increaseflag = False
numberis0 = False
print("[ 提示 ]:正在获取音乐集合下的所有作品数据请稍后...\r")
print("[ 提示 ]:会进行多次请求,等待时间较长...\r\n")
@ -819,6 +882,27 @@ class TikTok(object):
# print("[ 警告 ]:接口未返回数据, 正在重新请求!\r")
for aweme in datadict["aweme_list"]:
if increase is False and numflag and numberis0:
break
if increase and numflag and numberis0 and increaseflag:
break
# 增量更新, 找到非置顶的最新的作品发布时间
if self.db.get_music(music_id=music_id, aweme_id=aweme['aweme_id']) is not None:
if increase and aweme['is_top'] == 0:
increaseflag = True
else:
self.db.insert_music(music_id=music_id, aweme_id=aweme['aweme_id'], data=aweme)
# 退出条件
if increase and numflag is False and increaseflag:
break
if increase and numflag and numberis0 and increaseflag:
break
if numflag:
number -= 1
if number == 0:
numberis0 = True
# 获取 aweme_id
# aweme_id = aweme["aweme_id"]
# 深拷贝 dict 不然list里面全是同样的数据
@ -841,12 +925,14 @@ class TikTok(object):
if self.result.awemeDict is not None and self.result.awemeDict != {}:
awemeList.append(copy.deepcopy(self.result.awemeDict))
if numflag:
number -= 1
if number == 0:
break
if numflag and number == 0:
print("\r\n[ 提示 ]:[音乐集合] 下指定数量作品数据获取完成...\r\n")
if increase and numflag is False and increaseflag:
print("\r\n[ 提示 ]: [音乐集合] 下作品增量更新数据获取完成...\r\n")
break
elif increase is False and numflag and numberis0:
print("\r\n[ 提示 ]: [音乐集合] 下指定数量作品数据获取完成...\r\n")
break
elif increase and numflag and numberis0 and increaseflag:
print("\r\n[ 提示 ]: [音乐集合] 下指定数量作品数据获取完成, 增量更新数据获取完成...\r\n")
break
# 更新 cursor
@ -1006,7 +1092,7 @@ class TikTok(object):
pass
else:
try:
url = awemeDict["video"]["origin_cover"]["url_list"][0]
url = awemeDict["video"]["cover"]["url_list"][0]
if url != "":
self.isdwownload = False
# task_id = self.progress.add_task("download", filename="[ 封面 ]:" + desc, start=False)

View File

@ -37,6 +37,13 @@ configModel = {
"mix": 0,
"music": 0,
},
"increase": {
"post": False,
"like": False,
"allmix": False,
"mix": False,
"music": False,
},
"thread": 5,
"cookie": None
@ -72,6 +79,16 @@ def argument():
type=int, required=False, default=0)
parser.add_argument("--musicnumber", help="音乐(原声)下作品下载个数设置, 默认为0 全部下载",
type=int, required=False, default=0)
parser.add_argument("--postincrease", help="是否开启主页作品增量下载(True/False), 默认为False",
type=Utils().str2bool, required=False, default=False)
parser.add_argument("--likeincrease", help="是否开启主页喜欢增量下载(True/False), 默认为False",
type=Utils().str2bool, required=False, default=False)
parser.add_argument("--allmixincrease", help="是否开启主页合集增量下载(True/False), 默认为False",
type=Utils().str2bool, required=False, default=False)
parser.add_argument("--mixincrease", help="是否开启单个合集下作品增量下载(True/False), 默认为False",
type=Utils().str2bool, required=False, default=False)
parser.add_argument("--musicincrease", help="是否开启音乐(原声)下作品增量下载(True/False), 默认为False",
type=Utils().str2bool, required=False, default=False)
parser.add_argument("--thread", "-t",
help="设置线程数, 默认5个线程",
type=int, required=False, default=5)
@ -151,6 +168,31 @@ def yamlConfig():
configModel["number"]["music"] = configDict["number"]["music"]
except Exception as e:
print("[ 警告 ]:music number未设置, 使用默认值0...\r\n")
try:
if configDict["increase"]["post"] != None:
configModel["increase"]["post"] = configDict["increase"]["post"]
except Exception as e:
print("[ 警告 ]:post 增量更新未设置, 使用默认值False...\r\n")
try:
if configDict["increase"]["like"] != None:
configModel["increase"]["like"] = configDict["increase"]["like"]
except Exception as e:
print("[ 警告 ]:like 增量更新未设置, 使用默认值False...\r\n")
try:
if configDict["increase"]["allmix"] != None:
configModel["increase"]["allmix"] = configDict["increase"]["allmix"]
except Exception as e:
print("[ 警告 ]:allmix 增量更新未设置, 使用默认值False...\r\n")
try:
if configDict["increase"]["mix"] != None:
configModel["increase"]["mix"] = configDict["increase"]["mix"]
except Exception as e:
print("[ 警告 ]:mix 增量更新未设置, 使用默认值False...\r\n")
try:
if configDict["increase"]["music"] != None:
configModel["increase"]["music"] = configDict["increase"]["music"]
except Exception as e:
print("[ 警告 ]:music 增量更新未设置, 使用默认值False...\r\n")
try:
if configDict["thread"] != None:
configModel["thread"] = configDict["thread"]
@ -194,6 +236,11 @@ def main():
configModel["number"]["allmix"] = args.allmixnumber
configModel["number"]["mix"] = args.mixnumber
configModel["number"]["music"] = args.musicnumber
configModel["increase"]["post"] = args.postincrease
configModel["increase"]["like"] = args.likeincrease
configModel["increase"]["allmix"] = args.allmixincrease
configModel["increase"]["mix"] = args.mixincrease
configModel["increase"]["music"] = args.musicincrease
configModel["thread"] = args.thread
configModel["cookie"] = args.cookie
else:
@ -207,6 +254,8 @@ def main():
if configModel["cookie"] is not None and configModel["cookie"] != "":
tk.headers["Cookie"] = configModel["cookie"]
configModel["path"] = os.path.abspath(configModel["path"])
print("[ 提示 ]:数据保存路径 " + configModel["path"])
if not os.path.exists(configModel["path"]):
os.mkdir(configModel["path"])
@ -225,7 +274,7 @@ def main():
print("--------------------------------------------------------------------------------")
print("[ 提示 ]:正在请求用户主页模式: " + mode + "\r\n")
if mode == 'post' or mode == 'like':
datalist = tk.getUserInfo(key, mode, 35, configModel["number"][mode])
datalist = tk.getUserInfo(key, mode, 35, configModel["number"][mode], configModel["increase"][mode])
if datalist is not None and datalist != []:
modePath = os.path.join(userPath, mode)
if not os.path.exists(modePath):
@ -239,7 +288,7 @@ def main():
for mix_id in mixIdNameDict:
print(f'[ 提示 ]:正在下载合集 [{mixIdNameDict[mix_id]}] 中的作品\r\n')
mix_file_name = utils.replaceStr(mixIdNameDict[mix_id])
datalist = tk.getMixInfo(mix_id, 35)
datalist = tk.getMixInfo(mix_id, 35, 0, configModel["increase"]["allmix"], key)
if datalist is not None and datalist != []:
modePath = os.path.join(userPath, mode)
if not os.path.exists(modePath):
@ -252,7 +301,7 @@ def main():
print(f'[ 提示 ]:合集 [{mixIdNameDict[mix_id]}] 中的作品下载完成\r\n')
elif key_type == "mix":
print("[ 提示 ]:正在请求单个合集下作品\r\n")
datalist = tk.getMixInfo(key, 35, configModel["number"]["mix"])
datalist = tk.getMixInfo(key, 35, configModel["number"]["mix"], configModel["increase"]["mix"], "")
if datalist is not None and datalist != []:
mixPath = os.path.join(configModel["path"], "mix_" + key)
if not os.path.exists(mixPath):
@ -262,7 +311,7 @@ def main():
savePath=mixPath, thread=configModel["thread"])
elif key_type == "music":
print("[ 提示 ]:正在请求音乐(原声)下作品\r\n")
datalist = tk.getMusicInfo(key, 35, configModel["number"]["music"])
datalist = tk.getMusicInfo(key, 35, configModel["number"]["music"], configModel["increase"]["music"])
if datalist is not None and datalist != []:
musicPath = os.path.join(configModel["path"], "music_" + key)
if not os.path.exists(musicPath):

172
TikTokDataBase.py Normal file
View File

@ -0,0 +1,172 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
@FileName : TikTokDataBase.py
@Project : tiktok
@Description:
@Author : imgyh
@Mail : admin@imgyh.com
@Github : https://github.com/imgyh
@Site : https://www.imgyh.com
@Date : 2023/4/24 10:05
@Version : v1.0
@ChangeLog
------------------------------------------------
使用数据库保存获取的状态信息
------------------------------------------------
'''
import sqlite3
import json
class db(object):
def __init__(self):
self.conn = sqlite3.connect('data.db')
self.cursor = self.conn.cursor()
self.create_user_post_table()
self.create_user_like_table()
self.create_mix_table()
self.create_music_table()
def create_user_post_table(self):
sql = """CREATE TABLE if not exists t_user_post (
id integer primary key autoincrement,
sec_uid varchar(200),
aweme_id integer unique,
rawdata json
);"""
try:
self.cursor.execute(sql)
self.conn.commit()
except Exception as e:
pass
def get_user_post(self, sec_uid: str, aweme_id: int):
sql = """select id, sec_uid, aweme_id, rawdata from t_user_post where sec_uid=? and aweme_id=?;"""
try:
self.cursor.execute(sql, (sec_uid, aweme_id))
self.conn.commit()
res = self.cursor.fetchone()
return res
except Exception as e:
pass
def insert_user_post(self, sec_uid: str, aweme_id: int, data: dict):
insertsql = """insert into t_user_post (sec_uid, aweme_id, rawdata) values(?,?,?);"""
try:
self.cursor.execute(insertsql, (sec_uid, aweme_id, json.dumps(data)))
self.conn.commit()
except Exception as e:
pass
def create_user_like_table(self):
sql = """CREATE TABLE if not exists t_user_like (
id integer primary key autoincrement,
sec_uid varchar(200),
aweme_id integer unique,
rawdata json
);"""
try:
self.cursor.execute(sql)
self.conn.commit()
except Exception as e:
pass
def get_user_like(self, sec_uid: str, aweme_id: int):
sql = """select id, sec_uid, aweme_id, rawdata from t_user_like where sec_uid=? and aweme_id=?;"""
try:
self.cursor.execute(sql, (sec_uid, aweme_id))
self.conn.commit()
res = self.cursor.fetchone()
return res
except Exception as e:
pass
def insert_user_like(self, sec_uid: str, aweme_id: int, data: dict):
insertsql = """insert into t_user_like (sec_uid, aweme_id, rawdata) values(?,?,?);"""
try:
self.cursor.execute(insertsql, (sec_uid, aweme_id, json.dumps(data)))
self.conn.commit()
except Exception as e:
pass
def create_mix_table(self):
sql = """CREATE TABLE if not exists t_mix (
id integer primary key autoincrement,
sec_uid varchar(200),
mix_id varchar(200),
aweme_id integer,
rawdata json
);"""
try:
self.cursor.execute(sql)
self.conn.commit()
except Exception as e:
pass
def get_mix(self, sec_uid: str, mix_id: str, aweme_id: int):
sql = """select id, sec_uid, mix_id, aweme_id, rawdata from t_mix where sec_uid=? and mix_id=? and aweme_id=?;"""
try:
self.cursor.execute(sql, (sec_uid, mix_id, aweme_id))
self.conn.commit()
res = self.cursor.fetchone()
return res
except Exception as e:
pass
def insert_mix(self, sec_uid: str, mix_id: str, aweme_id: int, data: dict):
insertsql = """insert into t_mix (sec_uid, mix_id, aweme_id, rawdata) values(?,?,?,?);"""
try:
self.cursor.execute(insertsql, (sec_uid, mix_id, aweme_id, json.dumps(data)))
self.conn.commit()
except Exception as e:
pass
def create_music_table(self):
sql = """CREATE TABLE if not exists t_music (
id integer primary key autoincrement,
music_id varchar(200),
aweme_id integer unique,
rawdata json
);"""
try:
self.cursor.execute(sql)
self.conn.commit()
except Exception as e:
pass
def get_music(self, music_id: str, aweme_id: int):
sql = """select id, music_id, aweme_id, rawdata from t_music where music_id=? and aweme_id=?;"""
try:
self.cursor.execute(sql, (music_id, aweme_id))
self.conn.commit()
res = self.cursor.fetchone()
return res
except Exception as e:
pass
def insert_music(self, music_id: str, aweme_id: int, data: dict):
insertsql = """insert into t_music (music_id, aweme_id, rawdata) values(?,?,?);"""
try:
self.cursor.execute(insertsql, (music_id, aweme_id, json.dumps(data)))
self.conn.commit()
except Exception as e:
pass
if __name__ == '__main__':
pass