B站api应用实例:抓取小约翰可汗的BB空间签名
2023年8月9日更新主程序,解决B站wbi验证问题,将php动态页面迁移为python生成的静态html页面。代码如下:
from pathlib import Path
import csv
import requests
import time
import telebot
from functools import reduce
from hashlib import md5
import urllib.parse
url = 'https://api.bilibili.com/x/space/acc/info'
mixinKeyEncTab = [
46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
36, 20, 34, 44, 52
]
def getMixinKey(orig: str):
'对 imgKey 和 subKey 进行字符顺序打乱编码'
return reduce(lambda s, i: s + orig[i], mixinKeyEncTab, '')[:32]
def encWbi(params: dict, img_key: str, sub_key: str):
'为请求参数进行 wbi 签名'
mixin_key = getMixinKey(img_key + sub_key)
curr_time = round(time.time())
params['wts'] = curr_time # 添加 wts 字段
params = dict(sorted(params.items())) # 按照 key 重排参数
# 过滤 value 中的 "!'()*" 字符
params = {
k : ''.join(filter(lambda chr: chr not in "!'()*", str(v)))
for k, v
in params.items()
}
query = urllib.parse.urlencode(params) # 序列化参数
wbi_sign = md5((query + mixin_key).encode()).hexdigest() # 计算 w_rid
params['w_rid'] = wbi_sign
return params
def getWbiKeys() -> tuple[str, str]:
'获取最新的 img_key 和 sub_key'
resp = requests.get('https://api.bilibili.com/x/web-interface/nav')
resp.raise_for_status()
json_content = resp.json()
img_url: str = json_content['data']['wbi_img']['img_url']
sub_url: str = json_content['data']['wbi_img']['sub_url']
img_key = img_url.rsplit('/', 1)[1].split('.')[0]
sub_key = sub_url.rsplit('/', 1)[1].split('.')[0]
return img_key, sub_key
# function: get user info
def get_info():
img_key, sub_key = getWbiKeys()
signed_params = encWbi(
params={
'mid': '23947287',
'jsonp': 'jsonp'
},
img_key=img_key,
sub_key=sub_key
)
query = urllib.parse.urlencode(signed_params)
full_url = 'https://api.bilibili.com/x/space/wbi/acc/info?' + query
resp = requests.get(full_url, headers={ 'user-agent': 'Dynamic Collection, xxx@xxx.com' })
response = resp.json()
#print(response)
sign_text = response['data']['sign']
avatar_url = response['data']['face']
avatar_name = Path(avatar_url).name
#print(sign_text, avatar_url, avatar_name)
return sign_text, avatar_url, avatar_name
# function: read csv file, newline='', encoding='utf-8'
def read_csv(file_path):
with open(file_path, newline='', encoding='utf-8') as f:
csv_reader = csv.reader(f)
first_line = next(csv_reader)
f.close()
return first_line
# function: write msg to csv file, newline='', encoding='utf-8'
def write_csv(file_path, row):
with open(file_path, 'r', encoding='utf-8') as readFile:
rd = csv.reader(readFile)
lines = list(rd)
lines.insert(0, row)
with open(file_path, 'w', newline='', encoding='utf-8') as writeFile:
wt = csv.writer(writeFile)
wt.writerows(lines)
readFile.close()
writeFile.close()
# function: download img and save to path
def download_img(img_url, img_path):
img = requests.get(img_url)
with open(img_path, 'wb') as f:
f.write(img.content)
f.close()
# function: generate HTML, the page has two columns, the first column is time, the second column is image
# the sort follows the order of avatar.csv, the images are in the folder images
# 'avatar.csv' is the file that records the time and name of the avatar
# 'images' is the folder that stores the avatar
def generate_html_img():
work_path = "/var/www/html/pa.ci/ljk/"
avatar_csv = work_path + 'avatar.csv'
avatar_path = '.images/'
html_path = work_path + 'images.html'
with open(avatar_csv, 'r', encoding='utf-8') as readFile:
reader = csv.reader(readFile)
lines = [line for line in reader]
readFile.close()
with open(html_path, 'w', encoding='utf-8') as writeFile:
writeFile.write('<!DOCTYPE html>\r\n')
writeFile.write('<html lang="zh">\r\n')
writeFile.write('<head>\r\n')
writeFile.write('<meta charset="utf-8">\r\n')
writeFile.write('<title>小约翰可汗的签名记录</title>\r\n')
writeFile.write('<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">\r\n')
writeFile.write('<meta name="viewport" content="width=device-width, initial-scale=1">\r\n')
writeFile.write('<link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css">\r\n')
writeFile.write('<script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>\r\n')
writeFile.write('<script src="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/js/bootstrap.min.js"></script>\r\n')
writeFile.write('</head>\r\n')
writeFile.write('<body>\r\n')
writeFile.write('<div class="container">\r\n')
writeFile.write('<h1>小约翰可汗今天鸽了吗?</h1>\r\n')
writeFile.write('<p>我怎么知道?自己去看!</p>\r\n')
writeFile.write('<h3>小约翰可汗的BB空间签名和头像记录,每15分钟检查一次。</h3>\r\n')
writeFile.write('<p>签名记录为 <a href="https://pa.ci/ljk/index.html">https://pa.ci/ljk/index.html</a></p>\r\n')
writeFile.write('<p>头像记录为 <a href="https://pa.ci/ljk/images.html">https://pa.ci/ljk/images.html</a></p>\r\n')
writeFile.write('<p>本站详情/说明请看 <a href="https://pa.ci/137.html">https://pa.ci/137.html</a></p>\r\n')
writeFile.write('<p>实时查看可订阅Telegram频道 <a href="https://t.me/LittleJohnKhan">https://t.me/LittleJohnKhan</a></p>\r\n')
writeFile.write('<div class="row">\r\n')
writeFile.write('<div class="col-md-6">\r\n')
writeFile.write('<table class="table table-striped">\r\n')
writeFile.write('<thead>\r\n')
writeFile.write('<tr>\r\n')
writeFile.write('<th>时间</th>\r\n')
writeFile.write('<th>签名</th>\r\n')
writeFile.write('</tr>\r\n')
writeFile.write('</thead>\r\n')
writeFile.write('<tbody>\r\n')
for line in lines:
writeFile.write('<tr>\r\n')
writeFile.write('<td>' + line[0] + '</td>\r\n')
writeFile.write('<td><img src="' + avatar_path + line[1] + '" alt="' + line[1] + '" width="200" height="200" loading="lazy"></td>\r\n')
writeFile.write('</tr>\r\n')
writeFile.write('</tbody>\r\n')
writeFile.write('</table>\r\n')
writeFile.write('</div>\r\n')
writeFile.write('</div>\r\n')
writeFile.write('</div>\r\n')
writeFile.write('</body>\r\n')
writeFile.write('</html>\r\n')
writeFile.close()
def generate_html_sign():
work_path = "/var/www/html/pa.ci/ljk/"
time_csv = work_path + 'time.csv'
html_path = work_path + 'index.html'
with open(time_csv, 'r', encoding='utf-8') as readFile:
reader = csv.reader(readFile)
lines = [line for line in reader]
readFile.close()
with open(html_path, 'w', encoding='utf-8') as writeFile:
writeFile.write('<!DOCTYPE html>\r\n')
writeFile.write('<html lang="zh">\r\n')
writeFile.write('<head>\r\n')
writeFile.write('<meta charset="utf-8">\r\n')
writeFile.write('<title>小约翰可汗的签名记录</title>\r\n')
writeFile.write('<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">\r\n')
writeFile.write('<meta name="viewport" content="width=device-width, initial-scale=1">\r\n')
writeFile.write('<link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css">\r\n')
writeFile.write('<script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>\r\n')
writeFile.write('<script src="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/js/bootstrap.min.js"></script>\r\n')
writeFile.write('</head>\r\n')
writeFile.write('<body>\r\n')
writeFile.write('<div class="container">\r\n')
writeFile.write('<h1>小约翰可汗今天鸽了吗?</h1>\r\n')
writeFile.write('<p>我怎么知道?自己去看!</p>\r\n')
writeFile.write('<h3>小约翰可汗的BB空间签名和头像记录,每15分钟检查一次。</h3>\r\n')
writeFile.write('<p>签名记录为 <a href="https://pa.ci/ljk/index.html">https://pa.ci/ljk/index.html</a></p>\r\n')
writeFile.write('<p>头像记录为 <a href="https://pa.ci/ljk/images.html">https://pa.ci/ljk/images.html</a></p>\r\n')
writeFile.write('<p>本站详情/说明请看 <a href="https://pa.ci/137.html">https://pa.ci/137.html</a></p>\r\n')
writeFile.write('<p>实时查看可订阅Telegram频道 <a href="https://t.me/LittleJohnKhan">https://t.me/LittleJohnKhan</a></p>\r\n')
writeFile.write('<table class="table table-striped">\r\n')
writeFile.write('<thead>\r\n')
writeFile.write('<tr>\r\n')
writeFile.write('<th>时间</th>\r\n')
writeFile.write('<th>签名</th>\r\n')
writeFile.write('</tr>\r\n')
writeFile.write('</thead>\r\n')
writeFile.write('<tbody>\r\n')
# for some cases, the text may have mutiple lines, so we need to add <br> to make it display correctly, the text should be in the same column
for line in lines:
writeFile.write('<tr>\r\n')
writeFile.write('<td>' + line[0] + '</td>\r\n')
writeFile.write('<td>' + line[1].replace('\n', '<br>') + '</td>\r\n')
writeFile.write('</tr>\r\n')
writeFile.write('</tbody>\r\n')
writeFile.write('</table>\r\n')
writeFile.write('</div>\r\n')
writeFile.write('</body>\r\n')
writeFile.write('</html>\r\n')
writeFile.close()
# main function
def main():
# work path is "/www/wwwroot/pa.ci/ljk/", all files in this path
work_path = "/var/www/html/pa.ci/ljk/"
#get_info('23947287')
sign_text, avatar_url, avatar_name = get_info()
#read_csv('time.csv')
sign_first_line = read_csv(work_path + 'time.csv')
if str(sign_first_line[1]) != str(sign_text):
time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
row = [time_update, sign_text]
write_csv(work_path + 'time.csv', row)
time.sleep(1)
generate_html_sign()
else:
pass
#read_csv('avatar.csv')
avatar_first_line = read_csv(work_path + 'avatar.csv')
if str(avatar_first_line[1]) != str(avatar_name):
time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
row = [time_update, avatar_name]
send_msg("头像更新" + "\n" + time_update + "\n" + avatar_name)
write_csv(work_path + 'avatar.csv', row)
image_path = work_path + '.images/' + avatar_name
download_img(avatar_url, image_path)
time.sleep(1)
generate_html_img()
else:
pass
if __name__ == '__main__':
main()
2021年10月13日更新主程序,添加了图片展示功能,具体程序看本文末尾新加内容。
签名记录为 https://pa.ci/ljk/index.html
头像记录为 https://pa.ci/ljk/images.html
B站之前是直接提供api的,网址是docs.bilibili.cn,后来因为负载太高不对外开放了。所幸现在还有人在收集api放在github上。利用api解析出json格式的个人信息并抓取签名对应的sign
文字。抓取使用的是python脚本,用crontab每15分钟运行一次,记录保存到csv文件里面。用PHP写了个简易的页面,将csv读取并展示出来,地址为https://pa.ci/ljk/index.html。
以下是python脚本,因为csv文件不大,所以没有用mysql,直接w/r一把梭。
#!/usr/bin python3
import csv
import requests
import time
url = 'https://api.bilibili.com/x/web-interface/card'
params = (
('mid', '23947287'),
)
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0'
}
response = requests.get(url=url, params=params, headers=headers).json()
sign_text = response['data']['card']['sign']
file_path = r'record.csv'
with open(file_path, newline='', encoding='utf-8') as f:
csv_reader = csv.reader(f)
first_line = next(csv_reader)
f.close()
if str(first_line[1]) != str(sign_text):
time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
row = [time_update, sign_text]
with open(file_path, 'r', encoding='utf-8') as readFile:
rd = csv.reader(readFile)
lines = list(rd)
lines.insert(0, row)
with open(file_path, 'w', newline='', encoding='utf-8') as writeFile:
wt = csv.writer(writeFile)
wt.writerows(lines)
readFile.close()
writeFile.close()
以下是PHP页面,直接读csv就完事了,现在文件不大响应速度还行,不知道以后文件太大会不会高io拖垮服务器。
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>小约翰可汗的签名记录</title>
<link rel="shortcut icon" href="favicon.ico">
</head>
<body>
<center>
<h1>小约翰可汗今天鸽了吗?</h1>
<p>我怎么知道?自己去看!</p>
<h3>小约翰可汗的BB空间签名记录,每15分钟检查一次。</h3>
<p>小约翰可汗的签名记录为 <a href="https://pa.ci/ljk">https://pa.ci/ljk</a>(ljk是Little John Khan的缩写)</p>
<p>本站详情/说明请看 <a href="https://pa.ci/137.html">https://pa.ci/137.html</a></p>
<?php
//echo '博客主站 <a href="https://pa.ci">https://pa.ci</a>';
//echo '<br>';
echo "<table>\n\n";
// Open a file
$file = fopen("record.csv", "r");
// Fetching data from csv file row by row
while (($data = fgetcsv($file)) !== false) {
// HTML tag for placing in row format
echo "<tr>";
foreach ($data as $i) {
echo "<td>" . htmlspecialchars($i) . "</td>";
}
echo "</tr> \n";
}
// Closing the file
fclose($file);
echo "\n</table>";
?>
</center>
</body>
</html>
更新功能,现在可以同时记录签名和头像。
python写的主程序如下:
#!/usr/bin python3
import os
from pathlib import Path
import csv
import requests
import time
url = 'https://api.bilibili.com/x/web-interface/card'
params = (
('mid', '23947287'),
)
headers = {
"user-agent": ""
}
response = requests.get(url=url, params=params, headers=headers).json()
sign_text = response['data']['card']['sign']
avatar_url = response['data']['card']['face']
avatar_name = Path(avatar_url)
file_name = avatar_name.name
file_path = r'time.csv'
with open(file_path, newline='', encoding='utf-8') as f:
csv_reader = csv.reader(f)
first_line = next(csv_reader)
f.close()
if str(first_line[1]) != str(sign_text):
time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
row = [time_update, sign_text]
with open(file_path, 'r', encoding='utf-8') as readFile:
rd = csv.reader(readFile)
lines = list(rd)
lines.insert(0, row)
with open(file_path, 'w', newline='', encoding='utf-8') as writeFile:
wt = csv.writer(writeFile)
wt.writerows(lines)
readFile.close()
writeFile.close()
file_path_avatar = r'avatar.csv'
with open(file_path_avatar, newline='', encoding='utf-8') as f:
csv_reader = csv.reader(f)
first_line = next(csv_reader)
f.close()
if str(first_line[1]) != str(file_name):
save_path = r'images/'
completeName = os.path.join(save_path, file_name)
response = requests.get(avatar_url)
file = open(completeName, "wb")
file.write(response.content)
file.close()
time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
row = [time_update, file_name]
with open(file_path_avatar, 'r', encoding='utf-8') as readFile:
rd = csv.reader(readFile)
lines = list(rd)
lines.insert(0, row)
with open(file_path_avatar, 'w', newline='', encoding='utf-8') as writeFile:
wt = csv.writer(writeFile)
wt.writerows(lines)
readFile.close()
writeFile.close()
PHP写的图片展示,最新的放在最上面。
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>小约翰可汗的签名记录</title>
<link rel="shortcut icon" href="favicon.ico">
</head>
<body>
<center>
<h1>小约翰可汗今天鸽了吗?</h1>
<p>我怎么知道?自己去看!</p>
<h3>小约翰可汗的BB空间签名和头像记录,每15分钟检查一次。</h3>
<p>签名记录为 <a href="https://pa.ci/ljk/index.php">https://pa.ci/ljk/index.php</a>(ljk是Little John Khan的缩写)</p>
<p>头像记录为 <a href="https://pa.ci/ljk/images.php">https://pa.ci/ljk/images.php</a></p>
<p>本站详情/说明请看 <a href="https://pa.ci/137.html">https://pa.ci/137.html</a></p>
<?php
$dirname = '.images/';
$images = glob($dirname . '*.jpg');
$mostrecent = 0;
$mostrecentimg = null;
// scan
foreach ($images as $image) {
$imagemod = filemtime($image);
if ($mostrecent < $imagemod) {
$mostrecentimg = $image;
$mostrecent = $imagemod;
}
}
// display
echo '<img src="' . $mostrecentimg . '" height="300"/><br />';
foreach($images as $image) {
// the most recent was already output above so skip remainder this iteration
if ($image == $mostrecentimg) continue;
echo '<img src="' . $image . '" height="300"/><br />';
}
?>
</center>
</body>
</html>
大佬牛皮
过奖了
大佬能不能让图片那边也能显示日期呢?
等我有空的时候弄一下吧