mirror of
https://github.com/imgyh/tiktok.git
synced 2025-04-05 07:34:12 +08:00
91 lines
3.9 KiB
Python
91 lines
3.9 KiB
Python
import requests, re, os, time
|
||
from selenium import webdriver
|
||
from webdriver_manager.chrome import ChromeDriverManager
|
||
from bs4 import BeautifulSoup
|
||
|
||
|
||
class TikTok(object):
|
||
|
||
# 利用selenium可以获取cookies
|
||
def __init__(self):
|
||
option = webdriver.ChromeOptions()
|
||
# option.add_argument('headless') # 设置option
|
||
# option.add_argument("--headless")
|
||
# option.add_argument('--disable-gpu') # 一些情况下使用headless GPU会有问题(我没遇到)
|
||
# option.add_argument('window-size=1920x1080') # 页面部分内容是动态加载得时候,无头模式默认size为0x0,需要设置最大化窗口并设置windowssize,不然会出现显示不全的问题
|
||
# option.add_argument('--start-maximized') # 页面部分内容是动态加载得时候,无头模式默认size为0x0,需要设置最大化窗口并设置windowssize,不然会出现显示不全的问题
|
||
self.driver = webdriver.Chrome(ChromeDriverManager().install(), options=option)
|
||
# self.driver.get("https://www.douyin.com")
|
||
# # 获取cookie
|
||
# cookie_items = self.driver.get_cookies()
|
||
# cookie_str = ""
|
||
# # 组装cookie字符串
|
||
# for item_cookie in cookie_items:
|
||
# item_str = item_cookie["name"] + "=" + item_cookie["value"] + "; "
|
||
# cookie_str += item_str
|
||
self.headers = {
|
||
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36"
|
||
}
|
||
|
||
def videoShareLinkConvert(self, shareLink="https://v.douyin.com/kcvMpuN/"):
|
||
temp = shareLink.split("com/")[1].split("/")[0]
|
||
shareUrl = "https://v.douyin.com/" + temp
|
||
# 获取 awemeId
|
||
r = requests.get(shareUrl, self.headers)
|
||
awemeId = r.url.split('/')[5]
|
||
# print(awemeId)
|
||
return "https://www.douyin.com/video/" + awemeId
|
||
|
||
# 视频基本信息
|
||
def oneVideoInfo(self, url="https://www.douyin.com/video/6915675899241450760"):
|
||
self.driver.get(url)
|
||
html = self.driver.page_source
|
||
# print(html)
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
# 视频资源地址
|
||
list = soup.findAll(name="source")
|
||
# print(list)
|
||
videoRealUrl = list[2].get("src")
|
||
videoRealUrl = "https:" + videoRealUrl.split('&')[0] + "&ratio=1080p&line=0"
|
||
|
||
print(videoRealUrl)
|
||
return videoRealUrl
|
||
|
||
def userShareLinkConvert(self, shareLink="https://v.douyin.com/kcvSCe9/"):
|
||
temp = shareLink.split("com/")[1].split("/")[0]
|
||
shareUrl = "https://v.douyin.com/" + temp
|
||
# 获取 userId
|
||
r = requests.get(shareUrl, self.headers)
|
||
userId = r.url.split("?")[0].split("user/")[1]
|
||
# print(userId)
|
||
return "https://www.douyin.com/user/" + userId
|
||
|
||
# 用户基本信息
|
||
def userVideoInfo(self, url="https://www.douyin.com/user/MS4wLjABAAAA06y3Ctu8QmuefqvUSU7vr0c_ZQnCqB0eaglgkelLTek"):
|
||
self.driver.get(url)
|
||
# 模拟鼠标下滑
|
||
js = "var q=document.documentElement.scrollTop=100000"
|
||
while True:
|
||
self.driver.execute_script(js)
|
||
html = self.driver.page_source
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
# print(len(soup.findAll(name="div", attrs={"class": "Sr905S5u"})))
|
||
# 滑到底部 Sr905S5u 这个div会消失
|
||
if len(soup.findAll(name="div", attrs={"class": "Sr905S5u"})) == 0:
|
||
break
|
||
time.sleep(1)
|
||
# 视频资源地址
|
||
list = soup.findAll(name="a", attrs={"class": "B3AsdZT9 chmb2GX8"})
|
||
userVideoUrls = []
|
||
for i in list:
|
||
# print("https://www.douyin.com" + i.get("href"))
|
||
videoRealUrl = self.oneVideoInfo("https://www.douyin.com" + i.get("href"))
|
||
userVideoUrls.append(videoRealUrl)
|
||
return userVideoUrls
|
||
|
||
|
||
tk = TikTok()
|
||
# tk.oneVideoInfo()
|
||
tk.userVideoInfo()
|
||
tk.driver.quit()
|