源码仅用于学习交流!
目标:六间房APP
客户端:夜神模拟器
抓包工具:fiddler
要求:爬取APP内的小视频,不带水印,多线程
难度:入门
知识点:fiddler抓包,json解析,开启多线程
import requests
from fake_useragent import UserAgent
from threading import Thread
def download_mp4(url):
mp4 = requests.get(url, headers=headers).content
with open('./6间房/{}'.format(url.split('/')[-1]), 'wb') as f:
f.write(mp4)
num = int(input('输入要爬取的页数(每页20个视频):'))
for page in range(1,num+1):
headers = {
'User-Agent': 'Vc0xDsIwDEDRq)QEle3Ejp1LtIKJqXLaBIoESEhIDDk86sj09ab)3T)rzZ)Xh993Xy7nhUcbpasIpkgakJiQ(zTP03DCNMzvVycgoh6OIgRMhIjBxIyjlmZC2krxtqlx9U0Bay1MAOIaPWCpiaS2rhkypYyWdc2H)58)',
'Host': 'v.6.cn',
'Connection': 'Keep-Alive',
'Accept-Encoding': 'gzip'
}
url = 'https://v.6.cn/coop/mobile/index.php?act=recommend&padapi=minivideo-getlist.php&page={}'.format(page)
res = requests.get(url,headers=headers).json()
for r in res['content']['list']:
mp4_url = r['playurl']
headers ={
'User-Agent':UserAgent().chrome
}
# 创建一个线程
t = Thread(target=download_mp4(mp4_url))
# 开启线程
t.start()