使用Python脚本下载Bilibili相簿

参考链接:Bilibili相簿下载(Bilibili Album Download)

下载Bilibili相簿

目录

1. 接口展示
2. 代码转载
3. 代码详细注释
4.总结

一、接口展示 ↶

api1:https://api.vc.bilibili.com/link_draw/v1/doc/upload_count?uid=+uid号
api2:https://api.vc.bilibili.com/link_draw/v1/doc/doc_list?page_size=30&biz=all&uid=+uid号

实例:
uid = 2080663
api1 = https://api.vc.bilibili.com/link_draw/v1/doc/upload_count?uid=2080663
api2 = https://api.vc.bilibili.com/link_draw/v1/doc/doc_list?page_size=30&biz=all&uid=2080663

二、代码转载 ↶

#encoding=utf-8
import requests,os,sys

basicApiUrl='https://api.vc.bilibili.com/link_draw/v1/doc/upload_count?uid='
apiUrl='https://api.vc.bilibili.com/link_draw/v1/doc/doc_list?page_size=30&biz=all&uid='

# Get the amount of all draws
# If error return 0
def getTotalDraw(bid):
    try:
        req=requests.get(basicApiUrl+bid)
        rspJson = req.json()
    except:
        return 0
    
    if('data' in rspJson and 'all_count' in rspJson['data']):
        return int(rspJson['data']['all_count'])
        
    return 0

# Get the draw list, 30 draws in each page
def downloadDrawList(bid,page):
    url = apiUrl+bid
    
    # Add page num
    url = url+'&page_num='+str(page)
    
    try:
        req=requests.get(url,timeout=5)
        rspJson = req.json()
        
        # Get all items in a range
        items = rspJson['data']['items']
        
        for i in items:
            urls = {}
            did = str(i['doc_id'])
            
            # Single item traversal
            count = 0
            for j in i['pictures']:
                urls[count]=j['img_src']
                count+=1
            
            # Download
            downloadDraw(bid,did,urls)
    except Exception as e:
        print(e)
        pass

# Download draws
def downloadDraw(bid,did,urls):
    count = 0
    for i in range(len(urls)):
        u = urls[i]
        try:
            # Get image format from url
            suffix = u.split(".")[-1]
            
            # File naming
            ## bid: Bilibili user id
            ## did: Draw id
            fileName = did+'_b'+str(count)+'.'+suffix
            
            if(os.path.exists('./'+bid+'/'+fileName)):
                print('Skipped '+did+' '+u)
                count+=1
                continue
            print('Downloading '+did+' '+u)
            # Download single image
            req = requests.get(u,timeout=20)            
            # Create image file
            with open('./'+bid+'/'+fileName,'wb') as f:
                f.write(req.content)
        except Exception as e:
            print(e)
            print('Fail to download: '+did+' '+u)
            
        count+=1

if __name__=='__main__':
    if(len(sys.argv)<2):
        print('Please enter the bilibili user id.')
        sys.exit(0)
    
    bid = str(sys.argv[1])
    
    # Create drawer's directory
    try:
        os.makedirs('./'+bid)
    except:
        pass

    totalDraw = getTotalDraw(bid)
    totalPage = int(totalDraw/30)+1 if totalDraw % 30 != 0 else totalDraw/30
    for page in range(totalPage):
        downloadDrawList(bid,page)
    

三、代码详细注释 ↶

#encoding=utf-8
import requests,os,sys #导入requests,os,sys模块

basicApiUrl='https://api.vc.bilibili.com/link_draw/v1/doc/upload_count?uid=' #赋值操作
apiUrl='https://api.vc.bilibili.com/link_draw/v1/doc/doc_list?page_size=30&biz=all&uid=' #同上

# Get the amount of all draws
# If error return 0
def getTotalDraw(bid):
    try:
        req=requests.get(basicApiUrl+bid) #request请求,url链接为basicApiUrl+bid拼接后的url
        rspJson = req.json() #json提取
    except:
        return 0
    
    if('data' in rspJson and 'all_count' in rspJson['data']): #如果rspJson中有'data'键
        return int(rspJson['data']['all_count'])  #并且rspJson['data']中有'all_count'键,则返回rspJson['data']['all_count']取整后的结果
        
    return 0  #否则,返回0

# Get the draw list, 30 draws in each page
def downloadDrawList(bid,page):
    url = apiUrl+bid #拼接apiUrl和bid,赋值给url
    
    # Add page num
    url = url+'&page_num='+str(page) #继续对url进行拼接,加上'&page_num'和str(page)的结果,最后重新赋值给url
    
    try:
        req=requests.get(url,timeout=5) #尝试请求拼接的url,超时设置为5秒,结果赋值给req
        rspJson = req.json() #json提取,赋值给rspJson
        
        # Get all items in a range
        items = rspJson['data']['items'] #提取rspJson['data']['items']的内容,赋值给items
        
        for i in items: #遍历items
            urls = {} #创建空字典,赋值给urls
            did = str(i['doc_id']) #提取i['doc_id']的内容,并对它进行字符串化操作,赋值给did
            
            # Single item traversal
            count = 0 #给count赋值为0
            for j in i['pictures']: #遍历i['pictures']
                urls[count]=j['img_src'] #提取j['img_src']的内容,键值对应count和它的内容
                count+=1 #count自加1
            
            # Download
            downloadDraw(bid,did,urls) #将bid,did,urls参数传入downloadDraw函数
    except Exception as e: #异常处理
        print(e) #打印异常
        pass #略过

# Download draws
def downloadDraw(bid,did,urls):
    count = 0 #给count赋值为0
    for i in range(len(urls)): #for循环,循环次数为len(urls)的长度
        u = urls[i] #提取urls字典中的键i对应的值,并把值赋给u
        try:
            # Get image format from url
            suffix = u.split(".")[-1] #对u进行split操作,取最后一个项,赋值给suffix
            
            # File naming
            ## bid: Bilibili user id
            ## did: Draw id
            fileName = did+'_b'+str(count)+'.'+suffix #拼接did、'-b'、str(count)、'.'和suffix,结果赋值给fileName
            
            if(os.path.exists('./'+bid+'/'+fileName)): #如果'./bid/fileName'存在,就跳过,count自加1,并结束本次循环
                print('Skipped '+did+' '+u)
                count+=1
                continue
            print('Downloading '+did+' '+u) #打印'Downloading'+did+' '+u的内容
            # Download single image
            req = requests.get(u,timeout=20)   #request请求,url为u,超时设置为20s,结果赋值给req 
            # Create image file
            with open('./'+bid+'/'+fileName,'wb') as f: #with上下文管理器,以'wb'格式打开'./'+bid+'/'+fileName,将对象命名为f
                f.write(req.content) #对f采用write方法,写入req.content的内容
        except Exception as e: #异常处理
            print(e) #打印异常
            print('Fail to download: '+did+' '+u) #打印'Fail to download: '+did+' '+u
            
        count+=1 #count自加1

if __name__=='__main__': #当做模块导入时,__name__不等于__main__,将不运行以下内容
                         #直接运行时,__name__==__main__,运行以下内容
    if(len(sys.argv)<2): #不给uid参数,打印信息,并直接退出,这方式是在控制台给uid参数使用
        print('Please enter the bilibili user id.')
        sys.exit(0)
    
    bid = str(sys.argv[1]) #提取所给的uid参数
    
    # Create drawer's directory #创建uid名的文件目录
    try:
        os.makedirs('./'+bid)
    except:
        pass

    totalDraw = getTotalDraw(bid) #在getTotalDraw函数中传入bid参数,得到的结果返还给totalDraw变量
    totalPage = int(totalDraw/30)+1 if totalDraw % 30 != 0 else totalDraw/30 #如果totalDraw和30作取模运算,如果余数不等于0,将totalDraw除以30的结果取整后加1,否则直接除以30,最后将结果赋值给totalPage
    for page in range(totalPage): #进行for循环,数量为totalPage次
        downloadDrawList(bid,page) #将bid和page参数,传入downloadDrawList函数
    

四、总结 ↶

其实没有什么实质性的东西,换一种方式写文章也挺有意思的。

点我回顶部 ☚

 
 
 
 
 
 
 
Fin.

你可能感兴趣的:(我的爬虫之旅)