【Python爬虫】爬取800种主流数字货币每日市场交易价格

不说太多描述,直接上代码。
目标网站:数字货币相关网站
爬虫目标:爬取目标网页上排名前800的数字货币在20190101到20200701的每日价格数据,包括开盘价、最高价、最低价、收盘价、交易量、市值数据

第一部分:获取排名前800的数字货币信息

import requests
from urllib.parse import urlencode
import pandas as pd
import time
import random

baseUrl = 'https://web-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?'
headers = {'accept': 'application/json, text/plain, */*',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9,und;q=0.8',
    'origin': 'https://coinmarketcap.com',
    'referer': 'https://coinmarketcap.com/',
    'sec-fetch-dest': 'empty',
    'sec-fetch-mode': 'cors',
    'sec-fetch-site': 'same-site',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'}

def get_json(num):
    data = {
        'convert': 'USD,BTC,ETH,XRP,BCH,LTC',
        'cryptocurrency_type': 'all',
        'limit': 200,
        'sort': 'market_cap',
        'sort_dir': 'desc',
        'start': num
    }
    url = baseUrl + urlencode(data)
    response = requests.get(url,headers = headers,verify=False)
    time.sleep(50 + random.random())
    return response.json()


df = pd.DataFrame()
for i in range(4):
    num = i*200+1
    json = get_json(num)
    
    idList = []
    nameList = []
    symbolList = []
    slugList = []
    
    for j in range(200):
        Id = json['data'][j]['id']
        idList.append(Id)
        name = json['data'][j]['name']
        nameList.append(name)
        symbol = json['data'][j]['symbol']
        symbolList.append(symbol)
        slug = json['data'][j]['slug']
        slugList.append(slug)
    
    data = pd.DataFrame(data = {'ID':idList,'name':nameList,'symbol':symbolList,'slug':slugList})
    
    df = pd.concat([df,data])

df.to_excel(r"bitID.xlsx",index=0)

第二部分:获取每个数字货币在区间内的价格信息

import pandas as pd
import requests
from bs4 import BeautifulSoup
import time
import random

path = r"bitID.xlsx"
name = pd.read_excel(path)

df = pd.DataFrame()

def get_message(datas,title):
    #日期,开盘价、最高价、最低价、收盘价、交易量、市值
    dateList = []
    openList = []
    maxList = []
    minList = []
    closeList = []
    cntList = []
    valueList = []

    for data in datas:
        date = data.find_all('td')[0].getText()
        dateList.append(date)
        openPrice = data.find_all('td')[1].getText()
        openList.append(openPrice)
        maxPrice = data.find_all('td')[2].getText()
        maxList.append(maxPrice)
        minPrice = data.find_all('td')[3].getText()
        minList.append(minPrice)
        closePrice = data.find_all('td')[4].getText()
        closeList.append(closePrice)
        cnt = data.find_all('td')[5].getText()
        cntList.append(cnt)
        value = data.find_all('td')[6].getText()
        valueList.append(value)

    df1 = pd.DataFrame(data={
        'title':[title]*len(dateList),'date':dateList,'open':openList,'max':maxList,'min':minList,'close':closeList,'cnt':cntList,'value':valueList,
    })

    return df1




for i in range(name.shape[0]):#name.shape[0]
    slug = name.slug[i]
    print(i,':',slug)
    url = r'https://coinmarketcap.com/currencies/'+slug+'/historical-data/?start=20190101&end=20200701'
    try:
        response = requests.get(url)
    except ConnectionError:
        time.sleep(10+random.random())
        response = requests.get(url)
    except:
        time.sleep(10+random.random())
        response = requests.get(url)
        
    time.sleep(10+random.random())
    soup = BeautifulSoup(response.content,'lxml')
    
    soup = soup.find('div',class_='sc-1oio33t-0 kQXqPh cmc-tab-historical-data')
    title = soup.find('h2').getText()
    
    datas = soup.find_all('tr',class_='cmc-table-row')
    
    df1 = get_message(datas,title)
    
    df = pd.concat([df,df1])



df.to_csv(r"bitPrice.csv",index=0)

有不懂的可以留言,看到会回复,码字不易,喜欢请点赞,谢谢!!

你可能感兴趣的:(爬虫,python,数字货币,比特币,价格,爬虫)