代码仅供学习交流,请勿用于非法用途
drop database if exists shop;
create database shop default charset utf8;
use shop;
create table `store`(
`id` int primary key auto_increment,
`store_id` varchar(18) not null comment 'store_id',
`area_id` varchar(18) not null comment 'area_id',
UNIQUE KEY `area_id` (`area_id`, `store_id`)
)engine=INNODB charset=utf8;
create table `goods`(
`id` int primary key auto_increment,
`goods_id` varchar(18) not null unique comment 'goods_id'
)engine=INNODB charset=utf8;
import requests
import json
from queue import Queue
import threading
import time
import xlrd
import xlwt
from xlutils.copy import copy
import MySQLdb
import datetime
'''
@Author :王磊
@Date :2019/9/20
@Description:优选微信小程序全国店铺商品数据爬取
'''
# -----------------------------------------------------------
threadNum = 1
excelPath = "c:/users/it1002/Desktop/data/excel"
imgPath = "c:/users/it1002/Desktop/data/img"
# 数据库账号
mysql_user = "root"
# 数据库密码
mysql_password = "root"
# 数据库名称
mysql_database = "shop"
# -----------------------------------------------------------
headers = {
"content-type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (Linux; Android 5.1.1; OPPO R11 Build/NMF26X; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36 MicroMessenger/7.0.6.1460(0x27000634) Process/appbrand0 NetType/WIFI Language/zh_CN",
"Host": "mall-store.xsyxsc.com",
}
headers_ = {
"User-Agent": "Mozilla/5.0 (Linux; Android 5.1.1; OPPO R11 Build/NMF26X; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/74.0.3729.136 Mobile Safari/537.36 MicroMessenger/7.0.6.1460(0x27000634) Process/appbrand0 NetType/WIFI Language/zh_CN",
}
userKey = "8be3d5dd-23e1-4b04-bf12-efca98d69d12"
areaList = []
class xsyxSpider(threading.Thread):
def __init__(self, loaQueue, *args, **kwargs):
super(xsyxSpider, self).__init__(*args, **kwargs)
self.excelPath = ""
self.loaQueue = loaQueue
self.excelTitle = ['prId', 'acId', 'preId', 'sku', 'dailySaleTime', 'tmBuyStart', 'tmBuyEnd', 'tmShowStart', 'tmShowEnd', 'adUrl', 'prName', 'tmPickUp', 'limitQty', 'ulimitQty', 'marketAmt', 'saleAmt', 'prType', 'areaId', 'shelfLife', 'folQty', 'daySaleQty', 'saleQty', 'vesName', 'attrs', 'prDetail', 'shTitle', 'prBrief', 'primaryUrls', 'detailUrls', 'consumerNum', 'hasImgTxt', 'prTitle', 'yieldly', 'brName', 'specialSale', 'brId', 'status']
def getDate(self):
return str(datetime.date.today())
def initExcel(self, areaName):
self.excelPath = excelPath + "/" + self.getDate() + "-" + str(areaName) + ".xls"
f = xlwt.Workbook()
sheet1 = f.add_sheet(u'double', cell_overwrite_ok=True)
for i in range(0, len(self.excelTitle)):
sheet1.write(0, i, self.excelTitle[i])
f.save(self.excelPath)
def writeExcel(self, data):
workbook = xlrd.open_workbook(self.excelPath)
sheets = workbook.sheet_names()
worksheet = workbook.sheet_by_name(sheets[0])
rows_old = worksheet.nrows
new_workbook = copy(workbook)
new_worksheet = new_workbook.get_sheet(0)
for j in range(0, len(data)):
try:
new_worksheet.write(rows_old, j, data[j])
except Exception as e:
continue
new_workbook.save(self.excelPath)
def getShopList(self, mapX, mapY):
url = "https://mall-store.xsyxsc.com/mall-store/store/getNearStoreList"
data = {
"mapX": str(mapX),
"mapY": str(mapY),
"userKey": userKey,
}
shopListResp = postHtml(url, data)
try:
return shopListResp['data']
except Exception as e:
return None
def updateStore(self, storeId):
url = "https://user.xsyxsc.com/api/user/user/updateCurrStoreId?userKey=" + userKey + "&storeId=" + str(storeId)
updateStoreResp = getHtml(url)
print("切换店铺响应:" + str(updateStoreResp))
try:
return updateStoreResp['rspCode'] == 'success'
except Exception as e:
pass
return False
def getTs(self):
return str(time.time()).replace(".", "")[: -4]
def getGoodsList(self, storeId, areaId):
url = "https://mall.xsyxsc.com/user/product/indexData?storeId=" + str(storeId) + "&areaId=" + str(areaId) + "&ts=" + str(self.getTs()) + "&userKey=" + userKey
goodsListResp = getHtml(url)
try:
return goodsListResp['data']['products']
except Exception as e:
return None
def saveImg(self, url, productId):
try:
img = requests.get(url)
path = imgPath + "/" + str(productId) + ".jpg"
with open(path, 'wb') as f:
f.write(img.content)
return path
except Exception as e:
return None
def getGoodsDetail(self, productId, activityId, storeId, areaId):
url = "https://mall.xsyxsc.com/user/product/productInfo?productId=" + str(productId) + "&activityId=" + str(activityId) + "&storeId=" + str(storeId) + "&productType=CHOICE&areaId=" + str(areaId) + "&userKey=" + userKey
goodsDetailResp = getHtml(url)
try:
goods_ = goodsDetailResp['data']
goods = []
try:
goods.append(str(goods_['prId']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['acId']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['preId']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['sku']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['dailySaleTime']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['tmBuyStart']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['tmBuyEnd']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['tmShowStart']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['tmShowEnd']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['adUrl']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['prName']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['tmPickUp']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['limitQty']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['ulimitQty']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['marketAmt']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['saleAmt']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['prType']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['areaId']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['shelfLife']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['folQty']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['daySaleQty']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['saleQty']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['vesName']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['attrs']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['prDetail']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['shTitle']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['prBrief']))
except Exception as e:
goods.append("")
try:
currImgPath = self.saveImg(goods_['primaryUrls'][0], productId)
goods.append(currImgPath)
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['detailUrls']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['consumerNum']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['hasImgTxt']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['prTitle']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['yieldly']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['brName']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['specialSale']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['brId']))
except Exception as e:
goods.append("")
try:
goods.append(str(goods_['status']))
except Exception as e:
goods.append("")
return goods
except Exception as e:
pass
return None
def addStore(self, store):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute(
"insert into store(store_id, area_id) "
"values('%s', '%s')" %
(store['store_id'], store['area_id'])
)
conn.commit()
return True
except Exception as e:
return False
def addGoods(self, goods):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute(
"insert into goods(goods_id) "
"values('%s')" %
(goods['goods_id'])
)
conn.commit()
return True
except Exception as e:
return False
def initDb(self):
try:
conn = MySQLdb.connect(user=mysql_user, password=mysql_password, database=mysql_database, charset='utf8')
cursor = conn.cursor()
cursor.execute(
"delete from goods"
)
conn.commit()
cursor.execute(
"delete from store"
)
conn.commit()
return True
except Exception as e:
return False
def run(self):
dbStatus = self.initDb()
if dbStatus:
while True:
if self.loaQueue.empty():
break
loa = self.loaQueue.get()
area = loa[0]
mapX = loa[1]
mapY = loa[2]
self.initExcel(area)
print("当前选中区域:" + area + ", 经纬度:mapX : " + str(mapX) + ", mapY : " + str(mapY))
shopList = self.getShopList(mapX, mapY)
if not shopList:
print("该区域没有任何数据")
for shop in shopList:
storeId = shop['storeId']
areaId =shop['areaId']
if areaId not in areaList:
areaList.append(areaId)
store = {}
store['store_id'] = storeId
store['area_id'] = areaId
storeStatus = self.addStore(store)
if storeStatus:
print("当前区域id:" + str(areaId))
updateStatus = self.updateStore(storeId)
if updateStatus:
goodsList = self.getGoodsList(storeId, areaId)
for goods in goodsList:
productId = goods['prId']
activityId = goods['acId']
goodsBean = {}
goodsBean['goods_id'] = productId
goodsStatus = self.addGoods(goodsBean)
if goodsStatus:
goods_ = self.getGoodsDetail(productId, activityId, storeId, areaId)
self.writeExcel(goods_)
time.sleep(1)
def getLoaQueue():
loaQueue = Queue(0)
with open("loas_.txt", "r", encoding="utf-8") as f:
for line in f:
line = line.replace("\n", "").replace(" ", "").split(",")
area = line[0]
lng = line[1]
lat = line[2]
loaQueue.put([area, lng, lat])
return loaQueue
def postHtml(url, data):
while True:
try:
resp = requests.post(url, data=data, headers=headers, timeout=10)
return json.loads(resp.content.decode("utf-8"))
except Exception as e:
continue
def getHtml(url):
while True:
try:
resp = requests.get(url, headers=headers_, timeout=10)
return json.loads(resp.content.decode("utf-8"))
except Exception as e:
continue
def main():
loaQueue = getLoaQueue()
for i in range(threadNum):
x = xsyxSpider(loaQueue)
x.start()
if __name__ == '__main__':
main()