Python爬取百度网盘的目录

之前在某宝买了很多音乐的资源,虽然内容很多,但是没有目录,找自己喜欢的音乐找起来很不方便。就写了个脚本把目录给遍历了下来,方便查找。

具体流程也忘了,代码贴在这里供有相似需求的人参考

# coding=utf-8
import requests
import json

url = "https://pan.baidu.com/mbox/msg/shareinfo"

querystring = {"msg_id": "xxx", "from_uk": "xxx", "gid": "xxx", "type": "2"}

headers = {
    'Cookie': "BAIDUID=6B16885AA2D577DB751D42E49878E3FA:FG=1; PSTM=1564562405; PANWEB=1; BDUSS=khEeHVub35WVlNqYUhQWU05LTBxNnUxT1RxNlhzdn43SUNVemw0SFVFY2FoMnBkSVFBQUFBJCQAAAAAAAAAAAEAAADqlksLycvUtNauveG-pwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABr6Ql0a-kJdN; BIDUPSID=08AE076E56556C82FC02FF9F296D004B; MCITY=-%3A; SCRC=2ad541dafe7b6eadfc5b483c100e64d0; STOKEN=8c49760e8a96bb75f6144d7c16299f809ce1e7b696b9405c079a2fe7f451e4bc; BDCLND=WebXlbv2GmDdNd4HJBi5uGnAh2SC85J41mwdU71dySk%3D; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=1429_21081_30210_20698; delPer=0; PSINO=5; Hm_lvt_7a3960b6f067eb0085b7f96ff5e660b0=1575602779,1575618653,1576054596; Hm_lpvt_7a3960b6f067eb0085b7f96ff5e660b0=1576055103; PANPSC=3962494995828399816%3ACU2JWesajwC140uwiiBLpEYemHfSgA%2FTNHUgiDktYo%2Fox7p0KalvirmhPgkngqQ%2BDNhpRjIuDWkr2wqYbSNJzPBl8XefSTKb8yY2nnCH9ClE2OlLZIpvWpwI2lYmdZVOhH24qItkHZ51Gv5h8iKvz5C9qgHiau%2FldS8c7ndIzLExGZiuUUeCaoX0p6z9lD8V7g%2B5PM2vWus%3D,BAIDUID=6B16885AA2D577DB751D42E49878E3FA:FG=1; PSTM=1564562405; PANWEB=1; BDUSS=khEeHVub35WVlNqYUhQWU05LTBxNnUxT1RxNlhzdn43SUNVemw0SFVFY2FoMnBkSVFBQUFBJCQAAAAAAAAAAAEAAADqlksLycvUtNauveG-pwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABr6Ql0a-kJdN; BIDUPSID=08AE076E56556C82FC02FF9F296D004B; MCITY=-%3A; SCRC=2ad541dafe7b6eadfc5b483c100e64d0; STOKEN=8c49760e8a96bb75f6144d7c16299f809ce1e7b696b9405c079a2fe7f451e4bc; BDCLND=WebXlbv2GmDdNd4HJBi5uGnAh2SC85J41mwdU71dySk%3D; BDORZ=B490B5EBF6F3CD402E515D22BCDA1598; H_PS_PSSID=1429_21081_30210_20698; delPer=0; PSINO=5; Hm_lvt_7a3960b6f067eb0085b7f96ff5e660b0=1575602779,1575618653,1576054596; Hm_lpvt_7a3960b6f067eb0085b7f96ff5e660b0=1576055103; PANPSC=3962494995828399816%3ACU2JWesajwC140uwiiBLpEYemHfSgA%2FTNHUgiDktYo%2Fox7p0KalvirmhPgkngqQ%2BDNhpRjIuDWkr2wqYbSNJzPBl8XefSTKb8yY2nnCH9ClE2OlLZIpvWpwI2lYmdZVOhH24qItkHZ51Gv5h8iKvz5C9qgHiau%2FldS8c7ndIzLExGZiuUUeCaoX0p6z9lD8V7g%2B5PM2vWus%3D; BAIDUID=C492A93A968632C45911877CE92DC5F3:FG=1",
    'User-Agent': "PostmanRuntime/7.20.1",
    'Accept': "*/*",
    'Cache-Control': "no-cache",
    'Postman-Token': "4c05adac-5857-4347-a76a-f34aa9b52edb,e1e57913-6b61-41d6-b45a-1a9e0dee8ab7",
    'Host': "pan.baidu.com",
    'Accept-Encoding': "gzip, deflate",
    'Connection': "keep-alive",
    'cache-control': "no-cache"
}


def backtrace(fs_id, out_file):
    if len(str(fs_id)) > 1:
        querystring['fs_id'] = fs_id
    try:
        response = requests.request("GET", url, headers=headers, params=querystring)
        # response.content.decode('unicode_escape')
        result = response.json()
        print result
    except:
        print "error"
        return

    print result['errno']
    records = result['records']
    for i in range(0, len(records)):
        record = records[i]

        if record['isdir'] == '1' or record['isdir'] == 1:
            backtrace(record['fs_id'], out_file)
        else:
            line = record['server_filename'] + "----" + record['path'] + '\n'
            print line
            out_file.write(line.encode('utf-8'))


out_file = open('foo.txt', 'w')
backtrace('xxx', out_file)
out_file.close()

 

你可能感兴趣的:(python)