python 爬取steam/csgo市场数据

在不登录的情况下频繁访问steam市场很容易被禁止访问,由于steam社区市场被墙了,所以需要挂才能正常爬数据

首先挂好VPN,登录网页版steam,得到cookies和user-agent的内容,然后复制到headers中的cookies和user-agent位置上
python 爬取steam/csgo市场数据_第1张图片
设置好相关的路径后就可以了
得到的数据会保存到csv文件中

csgo的每一种箱子都会有一个对应的id,得到相应的ID然后保存到列表中就可以了,只写了得到箱子在售量和起价数据的程序,其他的东西原理上应该都差不多

初学python,有什么错误请多多指教

import sys
sys.path.append(r'E:\anaconda\Lib\site-packages')

from pyquery import PyQuery as pq
import requests
from urllib.parse import urlencode
from lxml import etree
import time
import random
import csv

base_url='https://steamcommunity.com/market/itemordershistogram?'

headers={   'Host': 'steamcommunity.com',
            'Referer': 'https://steamcommunity.com/market',
            'X-Requested-With': 'XMLHttpRequest',
            'Connection': 'keep-alive',
            'Cookie':' ',
            'user-agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36',
            }

retry_id_list = []
def get_page(case_id):
    params = {'country': 'CN',
              'language': 'schinese',
              'currency': '23',   #货币
              'item_nameid': case_id,
              'two_factor': 0              
            }

    url = base_url + urlencode(params)
    
    try:
        response = requests.Session().get(url,headers=headers)
        if response.status_code==200:
            print('successully get page')
            return response.json()
    except requests.ConnectionError as e:
        retry_id_list.append(case_id)
        print('error: ',e.args)
        return 0

def parse_page(json):
    if json:
        sell_items = json.get('sell_order_summary')
        html = etree.HTML(sell_items)
        num_and_price = html.xpath('//span/text()')
        price = num_and_price[1].split(' ')[1]
        num = num_and_price[0]
#        print(price)
#        print(num)
        return num,price

case_id_list = ['1275323','1269049','1546282','1913364','15490345',##CSGO	电竞2013	英勇大	CSGO2	电竞2013冬	
                '3438414','6820494','7177182','8987853',#冬季攻势	CSGO3	凤凰	猎杀者
                '14962905','15490346','23853214','29205213',#突围	电竞2014夏	先锋	幻彩
                '40091990','49359031','67060949','84444464',#幻彩2	弯曲	暗影	左轮
                '139654771','149865785','156110183','165027636',#野火	幻彩3	伽玛	伽玛2	
                '175854202','175880240','175896275','175917239',#手套	光谱	九头蛇	光谱2
                '175966708','175999886','176024744','176042493'] #命悬一线,地平线,头号,棱彩    

num_lst = []
price_lst = []
_time_ = time.strftime("%Y.%m.%d",time.localtime())  #获取当前时间
num_lst.append(_time_)     
price_lst.append(' ')

index = 0
retry_index_list = []

if __name__ =='__main__':
    for case_id in case_id_list:
        index = index + 1
        json = get_page(case_id)
        if json==0:
            num_lst.append(' ')
            price_lst.append(' ')
            retry_index_list.append(index)
        else:           
            data = parse_page(json)
            if int(data[0])>1000000:                               #调整箱子在售数量单位和精确度
                num_w = round(int(data[0])*0.0001,1)
            elif int(data[0])>10000:
                num_w = round(int(data[0])*0.0001,2)
            elif int(data[0])>1000:
                num_w = round(int(data[0])*0.0001,3)
            else:
                num_w = int(data[0])*0.0001
                
            num_lst.append(str(num_w))
            price_lst.append(data[1])
        time.sleep(random.randint(2,6))
        
    k_ = 0
    for retry_id in retry_id_list:
        print('retrying')
        json = get_page(retry_id)
        data = parse_page(json)
        if int(data[0])>1000000:                               #调整箱子在售数量单位和精确度
            num_w = round(int(data[0])*0.0001,1)
        elif int(data[0])>10000:
            num_w = round(int(data[0])*0.0001,2)
        elif int(data[0])>1000:
            num_w = round(int(data[0])*0.0001,3)
        else:
            num_w = int(data[0])*0.0001        
        
        num_lst[retry_index_list[k_]] = str(num_w)
        price_lst[retry_index_list[k_]] = data[1]
        k_=k_+1
        time.sleep(random.randint(2,6))
            
    with open('case_data.csv','a+') as csvfile:   #写入csv文件
        writer=csv.writer(csvfile)
        writer.writerow(num_lst)
        writer.writerow(price_lst)

你可能感兴趣的:(steam,csgo,steam,爬虫,python)