在不登录的情况下频繁访问steam市场很容易被禁止访问,由于steam社区市场被墙了,所以需要挂才能正常爬数据
首先挂好VPN,登录网页版steam,得到cookies和user-agent的内容,然后复制到headers中的cookies和user-agent位置上
设置好相关的路径后就可以了
得到的数据会保存到csv文件中
csgo的每一种箱子都会有一个对应的id,得到相应的ID然后保存到列表中就可以了,只写了得到箱子在售量和起价数据的程序,其他的东西原理上应该都差不多
初学python,有什么错误请多多指教
import sys
sys.path.append(r'E:\anaconda\Lib\site-packages')
from pyquery import PyQuery as pq
import requests
from urllib.parse import urlencode
from lxml import etree
import time
import random
import csv
base_url='https://steamcommunity.com/market/itemordershistogram?'
headers={ 'Host': 'steamcommunity.com',
'Referer': 'https://steamcommunity.com/market',
'X-Requested-With': 'XMLHttpRequest',
'Connection': 'keep-alive',
'Cookie':' ',
'user-agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Mobile Safari/537.36',
}
retry_id_list = []
def get_page(case_id):
params = {'country': 'CN',
'language': 'schinese',
'currency': '23', #货币
'item_nameid': case_id,
'two_factor': 0
}
url = base_url + urlencode(params)
try:
response = requests.Session().get(url,headers=headers)
if response.status_code==200:
print('successully get page')
return response.json()
except requests.ConnectionError as e:
retry_id_list.append(case_id)
print('error: ',e.args)
return 0
def parse_page(json):
if json:
sell_items = json.get('sell_order_summary')
html = etree.HTML(sell_items)
num_and_price = html.xpath('//span/text()')
price = num_and_price[1].split(' ')[1]
num = num_and_price[0]
# print(price)
# print(num)
return num,price
case_id_list = ['1275323','1269049','1546282','1913364','15490345',##CSGO 电竞2013 英勇大 CSGO2 电竞2013冬
'3438414','6820494','7177182','8987853',#冬季攻势 CSGO3 凤凰 猎杀者
'14962905','15490346','23853214','29205213',#突围 电竞2014夏 先锋 幻彩
'40091990','49359031','67060949','84444464',#幻彩2 弯曲 暗影 左轮
'139654771','149865785','156110183','165027636',#野火 幻彩3 伽玛 伽玛2
'175854202','175880240','175896275','175917239',#手套 光谱 九头蛇 光谱2
'175966708','175999886','176024744','176042493'] #命悬一线,地平线,头号,棱彩
num_lst = []
price_lst = []
_time_ = time.strftime("%Y.%m.%d",time.localtime()) #获取当前时间
num_lst.append(_time_)
price_lst.append(' ')
index = 0
retry_index_list = []
if __name__ =='__main__':
for case_id in case_id_list:
index = index + 1
json = get_page(case_id)
if json==0:
num_lst.append(' ')
price_lst.append(' ')
retry_index_list.append(index)
else:
data = parse_page(json)
if int(data[0])>1000000: #调整箱子在售数量单位和精确度
num_w = round(int(data[0])*0.0001,1)
elif int(data[0])>10000:
num_w = round(int(data[0])*0.0001,2)
elif int(data[0])>1000:
num_w = round(int(data[0])*0.0001,3)
else:
num_w = int(data[0])*0.0001
num_lst.append(str(num_w))
price_lst.append(data[1])
time.sleep(random.randint(2,6))
k_ = 0
for retry_id in retry_id_list:
print('retrying')
json = get_page(retry_id)
data = parse_page(json)
if int(data[0])>1000000: #调整箱子在售数量单位和精确度
num_w = round(int(data[0])*0.0001,1)
elif int(data[0])>10000:
num_w = round(int(data[0])*0.0001,2)
elif int(data[0])>1000:
num_w = round(int(data[0])*0.0001,3)
else:
num_w = int(data[0])*0.0001
num_lst[retry_index_list[k_]] = str(num_w)
price_lst[retry_index_list[k_]] = data[1]
k_=k_+1
time.sleep(random.randint(2,6))
with open('case_data.csv','a+') as csvfile: #写入csv文件
writer=csv.writer(csvfile)
writer.writerow(num_lst)
writer.writerow(price_lst)