B站api应用实例：抓取小约翰可汗的BB空间签名

2023年8月9日更新主程序，解决B站wbi验证问题，将php动态页面迁移为python生成的静态html页面。代码如下：

from pathlib import Path
import csv
import requests
import time
import telebot
from functools import reduce
from hashlib import md5
import urllib.parse

url = 'https://api.bilibili.com/x/space/acc/info'

mixinKeyEncTab = [
    46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
    33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
    61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
    36, 20, 34, 44, 52
]

def getMixinKey(orig: str):
    '对 imgKey 和 subKey 进行字符顺序打乱编码'
    return reduce(lambda s, i: s + orig[i], mixinKeyEncTab, '')[:32]

def encWbi(params: dict, img_key: str, sub_key: str):
    '为请求参数进行 wbi 签名'
    mixin_key = getMixinKey(img_key + sub_key)
    curr_time = round(time.time())
    params['wts'] = curr_time                                   # 添加 wts 字段
    params = dict(sorted(params.items()))                       # 按照 key 重排参数
    # 过滤 value 中的 "!'()*" 字符
    params = {
        k : ''.join(filter(lambda chr: chr not in "!'()*", str(v)))
        for k, v
        in params.items()
    }
    query = urllib.parse.urlencode(params)                      # 序列化参数
    wbi_sign = md5((query + mixin_key).encode()).hexdigest()    # 计算 w_rid
    params['w_rid'] = wbi_sign
    return params

def getWbiKeys() -> tuple[str, str]:
    '获取最新的 img_key 和 sub_key'
    resp = requests.get('https://api.bilibili.com/x/web-interface/nav')
    resp.raise_for_status()
    json_content = resp.json()
    img_url: str = json_content['data']['wbi_img']['img_url']
    sub_url: str = json_content['data']['wbi_img']['sub_url']
    img_key = img_url.rsplit('/', 1)[1].split('.')[0]
    sub_key = sub_url.rsplit('/', 1)[1].split('.')[0]
    return img_key, sub_key




# function: get user info
def get_info():
    img_key, sub_key = getWbiKeys()

    signed_params = encWbi(
        params={
            'mid': '23947287',
            'jsonp': 'jsonp'
        },
        img_key=img_key,
        sub_key=sub_key
    )
    query = urllib.parse.urlencode(signed_params)

    full_url = 'https://api.bilibili.com/x/space/wbi/acc/info?' + query

    resp = requests.get(full_url, headers={ 'user-agent': 'Dynamic Collection, xxx@xxx.com' })

    response = resp.json()

    #print(response)

    sign_text = response['data']['sign']
    avatar_url = response['data']['face']
    avatar_name = Path(avatar_url).name
    #print(sign_text, avatar_url, avatar_name)
    return sign_text, avatar_url, avatar_name

# function: read csv file, newline='', encoding='utf-8'
def read_csv(file_path):
    with open(file_path, newline='', encoding='utf-8') as f:
        csv_reader = csv.reader(f)
        first_line = next(csv_reader)
        f.close()
    return first_line

# function: write msg to csv file, newline='', encoding='utf-8'
def write_csv(file_path, row):
    with open(file_path, 'r', encoding='utf-8') as readFile:
        rd = csv.reader(readFile)
        lines = list(rd)
        lines.insert(0, row)
    with open(file_path, 'w', newline='', encoding='utf-8') as writeFile:
        wt = csv.writer(writeFile)
        wt.writerows(lines)
    readFile.close()
    writeFile.close()

# function: download img and save to path
def download_img(img_url, img_path):
    img = requests.get(img_url)
    with open(img_path, 'wb') as f:
        f.write(img.content)
        f.close()


# function: generate HTML, the page has two columns, the first column is time, the second column is image
# the sort follows the order of avatar.csv, the images are in the folder images
# 'avatar.csv' is the file that records the time and name of the avatar
# 'images' is the folder that stores the avatar
def generate_html_img():
    work_path = "/var/www/html/pa.ci/ljk/"
    avatar_csv = work_path + 'avatar.csv'
    avatar_path = '.images/'
    html_path = work_path + 'images.html'

    with open(avatar_csv, 'r', encoding='utf-8') as readFile:
        reader = csv.reader(readFile)
        lines = [line for line in reader]
        readFile.close()
    with open(html_path, 'w', encoding='utf-8') as writeFile:

        writeFile.write('<!DOCTYPE html>\r\n')
        writeFile.write('<html lang="zh">\r\n')
        writeFile.write('<head>\r\n')
        writeFile.write('<meta charset="utf-8">\r\n')
        writeFile.write('<title>小约翰可汗的签名记录</title>\r\n')
        writeFile.write('<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">\r\n')
        writeFile.write('<meta name="viewport" content="width=device-width, initial-scale=1">\r\n')
        writeFile.write('<link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css">\r\n')
        writeFile.write('<script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>\r\n')
        writeFile.write('<script src="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/js/bootstrap.min.js"></script>\r\n')
        writeFile.write('</head>\r\n')
        writeFile.write('<body>\r\n')
        writeFile.write('<div class="container">\r\n')
        writeFile.write('<h1>小约翰可汗今天鸽了吗？</h1>\r\n')
        writeFile.write('<p>我怎么知道？自己去看！</p>\r\n')
        writeFile.write('<h3>小约翰可汗的BB空间签名和头像记录，每15分钟检查一次。</h3>\r\n')
        writeFile.write('<p>签名记录为 <a href="https://pa.ci/ljk/index.html">https://pa.ci/ljk/index.html</a></p>\r\n')
        writeFile.write('<p>头像记录为 <a href="https://pa.ci/ljk/images.html">https://pa.ci/ljk/images.html</a></p>\r\n')
        writeFile.write('<p>本站详情/说明请看 <a href="https://pa.ci/137.html">https://pa.ci/137.html</a></p>\r\n')
        writeFile.write('<p>实时查看可订阅Telegram频道 <a href="https://t.me/LittleJohnKhan">https://t.me/LittleJohnKhan</a></p>\r\n')
        writeFile.write('<div class="row">\r\n')
        writeFile.write('<div class="col-md-6">\r\n')
        writeFile.write('<table class="table table-striped">\r\n')
        writeFile.write('<thead>\r\n')
        writeFile.write('<tr>\r\n')
        writeFile.write('<th>时间</th>\r\n')
        writeFile.write('<th>签名</th>\r\n')
        writeFile.write('</tr>\r\n')
        writeFile.write('</thead>\r\n')
        writeFile.write('<tbody>\r\n')

        for line in lines:
            writeFile.write('<tr>\r\n')
            writeFile.write('<td>' + line[0] + '</td>\r\n')
            writeFile.write('<td><img src="' + avatar_path + line[1] + '" alt="' + line[1] + '" width="200" height="200" loading="lazy"></td>\r\n')
            writeFile.write('</tr>\r\n')

        writeFile.write('</tbody>\r\n')
        writeFile.write('</table>\r\n')
        writeFile.write('</div>\r\n')
        writeFile.write('</div>\r\n')
        writeFile.write('</div>\r\n')
        writeFile.write('</body>\r\n')
        writeFile.write('</html>\r\n')
        writeFile.close()

def generate_html_sign():
    work_path = "/var/www/html/pa.ci/ljk/"
    time_csv = work_path + 'time.csv'
    html_path = work_path + 'index.html'

    with open(time_csv, 'r', encoding='utf-8') as readFile:
        reader = csv.reader(readFile)
        lines = [line for line in reader]
        readFile.close()
    with open(html_path, 'w', encoding='utf-8') as writeFile:
        writeFile.write('<!DOCTYPE html>\r\n')
        writeFile.write('<html lang="zh">\r\n')
        writeFile.write('<head>\r\n')
        writeFile.write('<meta charset="utf-8">\r\n')
        writeFile.write('<title>小约翰可汗的签名记录</title>\r\n')
        writeFile.write('<link rel="shortcut icon" href="favicon.ico" type="image/x-icon">\r\n')
        writeFile.write('<meta name="viewport" content="width=device-width, initial-scale=1">\r\n')
        writeFile.write('<link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css">\r\n')
        writeFile.write('<script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js"></script>\r\n')
        writeFile.write('<script src="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/js/bootstrap.min.js"></script>\r\n')
        writeFile.write('</head>\r\n')
        writeFile.write('<body>\r\n')
        writeFile.write('<div class="container">\r\n')
        writeFile.write('<h1>小约翰可汗今天鸽了吗？</h1>\r\n')
        writeFile.write('<p>我怎么知道？自己去看！</p>\r\n')
        writeFile.write('<h3>小约翰可汗的BB空间签名和头像记录，每15分钟检查一次。</h3>\r\n')
        writeFile.write('<p>签名记录为 <a href="https://pa.ci/ljk/index.html">https://pa.ci/ljk/index.html</a></p>\r\n')
        writeFile.write('<p>头像记录为 <a href="https://pa.ci/ljk/images.html">https://pa.ci/ljk/images.html</a></p>\r\n')
        writeFile.write('<p>本站详情/说明请看 <a href="https://pa.ci/137.html">https://pa.ci/137.html</a></p>\r\n')
        writeFile.write('<p>实时查看可订阅Telegram频道 <a href="https://t.me/LittleJohnKhan">https://t.me/LittleJohnKhan</a></p>\r\n')

        writeFile.write('<table class="table table-striped">\r\n')
        writeFile.write('<thead>\r\n')
        writeFile.write('<tr>\r\n')
        writeFile.write('<th>时间</th>\r\n')
        writeFile.write('<th>签名</th>\r\n')
        writeFile.write('</tr>\r\n')
        writeFile.write('</thead>\r\n')
        writeFile.write('<tbody>\r\n')

        # for some cases, the text may have mutiple lines, so we need to add <br> to make it display correctly, the text should be in the same column
        for line in lines:
            writeFile.write('<tr>\r\n')
            writeFile.write('<td>' + line[0] + '</td>\r\n')
            writeFile.write('<td>' + line[1].replace('\n', '<br>') + '</td>\r\n')
            writeFile.write('</tr>\r\n')


        writeFile.write('</tbody>\r\n')
        writeFile.write('</table>\r\n')
        writeFile.write('</div>\r\n')
        writeFile.write('</body>\r\n')
        writeFile.write('</html>\r\n')
        writeFile.close()

# main function
def main():

    # work path is "/www/wwwroot/pa.ci/ljk/", all files in this path
    work_path = "/var/www/html/pa.ci/ljk/"


    #get_info('23947287')
    sign_text, avatar_url, avatar_name = get_info()

    #read_csv('time.csv')
    sign_first_line = read_csv(work_path + 'time.csv')
    if str(sign_first_line[1]) != str(sign_text):
        time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        row = [time_update, sign_text]
        write_csv(work_path + 'time.csv', row)
        time.sleep(1)
        generate_html_sign()
    else:
        pass

    #read_csv('avatar.csv')
    avatar_first_line = read_csv(work_path + 'avatar.csv')
    if str(avatar_first_line[1]) != str(avatar_name):
        time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        row = [time_update, avatar_name]
        send_msg("头像更新" + "\n" + time_update + "\n" + avatar_name)
        write_csv(work_path + 'avatar.csv', row)
        image_path = work_path + '.images/' + avatar_name
        download_img(avatar_url, image_path)
        time.sleep(1)
        generate_html_img()
    else:
        pass

if __name__ == '__main__':
    main()

2021年10月13日更新主程序，添加了图片展示功能，具体程序看本文末尾新加内容。
签名记录为 https://pa.ci/ljk/index.html
头像记录为 https://pa.ci/ljk/images.html

B站之前是直接提供api的，网址是docs.bilibili.cn，后来因为负载太高不对外开放了。所幸现在还有人在收集api放在github上。利用api解析出json格式的个人信息并抓取签名对应的sign文字。抓取使用的是python脚本，用crontab每15分钟运行一次，记录保存到csv文件里面。用PHP写了个简易的页面，将csv读取并展示出来，地址为 https://pa.ci/ljk/index.html。

以下是python脚本，因为csv文件不大，所以没有用mysql，直接w/r一把梭。

#!/usr/bin python3

import csv
import requests
import time

url = 'https://api.bilibili.com/x/web-interface/card'
params = (
    ('mid', '23947287'),
)
headers = {
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:88.0) Gecko/20100101 Firefox/88.0'
}
response = requests.get(url=url, params=params, headers=headers).json()
sign_text = response['data']['card']['sign']

file_path = r'record.csv'

with open(file_path, newline='', encoding='utf-8') as f:
  csv_reader = csv.reader(f)
  first_line = next(csv_reader)
  f.close()

if str(first_line[1]) != str(sign_text):
    time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    row = [time_update, sign_text]
    with open(file_path, 'r', encoding='utf-8') as readFile:
        rd = csv.reader(readFile)
        lines = list(rd)
        lines.insert(0, row)
    with open(file_path, 'w', newline='', encoding='utf-8') as writeFile:
        wt = csv.writer(writeFile)
        wt.writerows(lines)
    readFile.close()
    writeFile.close()

以下是PHP页面，直接读csv就完事了，现在文件不大响应速度还行，不知道以后文件太大会不会高io拖垮服务器。

<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8">
    <title>小约翰可汗的签名记录</title>
    <link rel="shortcut icon" href="favicon.ico">
  </head>


<body>
<center>
<h1>小约翰可汗今天鸽了吗？</h1>
<p>我怎么知道？自己去看！</p>
<h3>小约翰可汗的BB空间签名记录，每15分钟检查一次。</h3>
<p>小约翰可汗的签名记录为 <a href="https://pa.ci/ljk">https://pa.ci/ljk</a>（ljk是Little John Khan的缩写）</p>
<p>本站详情/说明请看 <a href="https://pa.ci/137.html">https://pa.ci/137.html</a></p>

<?php

//echo '博客主站 <a href="https://pa.ci">https://pa.ci</a>';
//echo '<br>';

echo "<table>\n\n";

// Open a file
$file = fopen("record.csv", "r");

// Fetching data from csv file row by row
while (($data = fgetcsv($file)) !== false) {
        // HTML tag for placing in row format
        echo "<tr>";
        foreach ($data as $i) {
                echo "<td>" . htmlspecialchars($i) . "</td>";
        }
        echo "</tr> \n";
}

// Closing the file
fclose($file);

echo "\n</table>";
?>

</center>
</body>
</html>

更新功能，现在可以同时记录签名和头像。
python写的主程序如下：

#!/usr/bin python3

import os
from pathlib import Path
import csv
import requests
import time



url = 'https://api.bilibili.com/x/web-interface/card'

params = (
    ('mid', '23947287'),
)

headers = {
        "user-agent": ""
}

response = requests.get(url=url, params=params, headers=headers).json()

sign_text = response['data']['card']['sign']
avatar_url = response['data']['card']['face']

avatar_name = Path(avatar_url)
file_name = avatar_name.name

file_path = r'time.csv'

with open(file_path, newline='', encoding='utf-8') as f:
  csv_reader = csv.reader(f)
  first_line = next(csv_reader)
  f.close()

if str(first_line[1]) != str(sign_text):
    time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    row = [time_update, sign_text]
    with open(file_path, 'r', encoding='utf-8') as readFile:
        rd = csv.reader(readFile)
        lines = list(rd)
        lines.insert(0, row)
    with open(file_path, 'w', newline='', encoding='utf-8') as writeFile:
        wt = csv.writer(writeFile)
        wt.writerows(lines)
    readFile.close()
    writeFile.close()

file_path_avatar = r'avatar.csv'

with open(file_path_avatar, newline='', encoding='utf-8') as f:
    csv_reader = csv.reader(f)
    first_line = next(csv_reader)
    f.close()

if str(first_line[1]) != str(file_name):
    save_path = r'images/'
    completeName = os.path.join(save_path, file_name)
    response = requests.get(avatar_url)
    file = open(completeName, "wb")
    file.write(response.content)
    file.close()

    time_update = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
    row = [time_update, file_name]
    with open(file_path_avatar, 'r', encoding='utf-8') as readFile:
        rd = csv.reader(readFile)
        lines = list(rd)
        lines.insert(0, row)
    with open(file_path_avatar, 'w', newline='', encoding='utf-8') as writeFile:
        wt = csv.writer(writeFile)
        wt.writerows(lines)
    readFile.close()
    writeFile.close()

PHP写的图片展示，最新的放在最上面。

<!DOCTYPE html>
<html>
  <head>
    <meta charset="UTF-8">
    <title>小约翰可汗的签名记录</title>
    <link rel="shortcut icon" href="favicon.ico">
  </head>


<body>
<center>
<h1>小约翰可汗今天鸽了吗？</h1>
<p>我怎么知道？自己去看！</p>
<h3>小约翰可汗的BB空间签名和头像记录，每15分钟检查一次。</h3>
<p>签名记录为 <a href="https://pa.ci/ljk/index.php">https://pa.ci/ljk/index.php</a>（ljk是Little John Khan的缩写）</p>
<p>头像记录为 <a href="https://pa.ci/ljk/images.php">https://pa.ci/ljk/images.php</a></p>
<p>本站详情/说明请看 <a href="https://pa.ci/137.html">https://pa.ci/137.html</a></p>

<?php

$dirname = '.images/';
$images = glob($dirname . '*.jpg');
$mostrecent = 0;
$mostrecentimg = null;

// scan
foreach ($images as $image) {
  $imagemod = filemtime($image);
  if ($mostrecent < $imagemod) {
    $mostrecentimg = $image;
    $mostrecent = $imagemod;
  }
}

// display
echo '<img src="' . $mostrecentimg . '" height="300"/><br />';
foreach($images as $image) {

  // the most recent was already output above so skip remainder this iteration
  if ($image == $mostrecentimg) continue;

  echo '<img src="' . $image . '" height="300"/><br />';
}
?>

</center>
</body>
</html>

B站api应用实例：抓取小约翰可汗的BB空间签名

已有 4 条评论

添加新评论

最新文章

最近回复

分类

其它