爬去评论信息问题 加上goldUser[i]和Days[i]之后就报错 求指导


#导入需要的库
import requests
from bs4 import  BeautifulSoup as bs
import json
import csv
import re

#宏变量存储目标js的URL列表
COMMENT_PAGE_URL = []

#生成链接列表
def Get_Url(num):
    urlFront = 'https://rate.tmall.com/list_detail_rate.htm?itemId=579718698456&spuId=1089001014&sellerId=2838892713&order=3¤tPage='
    urlRear ='&append=1&content=1&tagId=&posi=&picture=0&groupId=&ua=098%23E1hv1pvov7hvUvCkvvvvvjiPRLFvgjlHP2q9gj1VPmPp6j1PRFSvAjnjPFFWgjimdphvmpvhRQn3OpmvH2yCvvpvvvvvdphvmpvU6vnFOQvpqgwCvvpvCvvvRphvCvvvvvvPvpvhvv2MMQhCvvOvCvvvphvEvpCWvVDGvva4%2BExr1EuKNpRxfwLWd3ODN%2BLWaNoAdcHjaXkxfXk4jLVxfwLZdig%2F70zhsjZ7%2B3%2Busjc6LLIt8ZJaKE5ErqO24gcE2f6A%2BuyCvv9vvUm6DDs7jgyCvvOUvvVva6JtvpvIvvvvvhCvvvvvvUUdphvUApvv9krvpvQvvvmm86CvmVWvvUUdphvUOTwCvvpvvUmmdphvV2QUwU8h3vvybGeQRkxB3XzpzEkSwZPh346CvCh92vomlYwvBRyidAI3pQeHAOhCvvswjWn9ZKMwzn1JQlurvpvEvvojvzxlvbLA9phv8cMGc02QzYswzREi7kdoE93ukbj2J1%2BhTKuUAbt39OEkgKRpy4V3OI8mRphvCvvvvvv%3D&itemPropertyId=&itemPropertyIndex=&userPropertyId=&userPropertyIndex=&rateQuery=&location=&needFold=0&_ksTS=1554086815326_1085&callback=jsonp1086'
    #urlRear = '&append=0&content=1&tagId=&posi=&picture=&groupId=&ua=098%23E1hvHQvRvpQvUpCkvvvvvjiPRLqp0jlbn2q96jD2PmPWsjn2RL5wQjnhn2cysjnhR86CvC8h98KKXvvveSQDj60x0foAKqytvpvhvvCvp86Cvvyv9PPQt9vvHI4rvpvEvUmkIb%2BvvvRCiQhvCvvvpZptvpvhvvCvpUyCvvOCvhE20WAivpvUvvCC8n5y6J0tvpvIvvCvpvvvvvvvvhZLvvvvtQvvBBWvvUhvvvCHhQvvv7QvvhZLvvvCfvyCvhAC03yXjNpfVE%2BffCuYiLUpVE6Fp%2B0xhCeOjLEc6aZtn1mAVAdZaXTAdXQaWg03%2B2e3rABCCahZ%2Bu0OJooy%2Bb8reEyaUExreEKKD5HavphvC9vhphvvvvGCvvpvvPMM3QhvCvmvphmCvpvZzPQvcrfNznswOiaftlSwvnQ%2B7e9%3D&needFold=0&_ksTS=1552466697082_2019&callback=jsonp2020'
    for i in range(0,num):
        COMMENT_PAGE_URL.append(urlFront+str(1+i)+urlRear)

#获取评论数据
def GetInfo(num):
    #定义需要的字段
    nickname = []
    auctionSku = []
    ratecontent = []
    ratedate = []
    content_add = []
    goldUser = []
    Days = []
    #循环获取每一页评论
    for i in range(num):
        #头文件,没有头文件会返回错误的js
        #headers = {
           # 'cookie':'cna=qMU/EQh0JGoCAW5QEUJ1/zZm; enc=DUb9Egln3%2Fi4NrDfzfMsGHcMim6HWdN%2Bb4ljtnJs6MOO3H3xZsVcAs0nFao0I2uau%2FbmB031ZJRvrul7DmICSw%3D%3D; lid=%E5%90%91%E6%97%A5%E8%91%B5%E7%9B%9B%E5%BC%80%E7%9A%84%E5%A4%8F%E5%A4%A9941020; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; hng=CN%7Czh-CN%7CCNY%7C156; x=__ll%3D-1%26_ato%3D0; t=2c579f9538646ca269e2128bced5672a; _m_h5_tk=86d64a702eea3035e5d5a6024012bd40_1551170172203; _m_h5_tk_enc=c10fd504aded0dc94f111b0e77781314; uc1=cookie16=V32FPkk%2FxXMk5UvIbNtImtMfJQ%3D%3D&cookie21=U%2BGCWk%2F7p4mBoUyS4E9C&cookie15=UtASsssmOIJ0bQ%3D%3D&existShop=false&pas=0&cookie14=UoTZ5bI3949Xhg%3D%3D&tag=8&lng=zh_CN; uc3=vt3=F8dByEzZ1MVSremcx%2BQ%3D&id2=UNcPuUTqrGd03w%3D%3D&nk2=F5RAQ19thpZO8A%3D%3D&lg2=U%2BGCWk%2F75gdr5Q%3D%3D; tracknick=tb51552614; _l_g_=Ug%3D%3D; ck1=""; unb=3778730506; lgc=tb51552614; cookie1=UUBZRT7oNe6%2BVDtyYKPVM4xfPcfYgF87KLfWMNP70Sc%3D; login=true; cookie17=UNcPuUTqrGd03w%3D%3D; cookie2=1843a4afaaa91d93ab0ab37c3b769be9; _nk_=tb51552614; uss=""; csg=b1ecc171; skt=503cb41f4134d19c; _tb_token_=e13935353f76e; x5sec=7b22726174656d616e616765723b32223a22393031623565643538663331616465613937336130636238633935313935363043493362302b4d46454e76646c7243692b34364c54426f4d4d7a63334f44637a4d4455774e6a7378227d; l=bBIHrB-nvFBuM0pFBOCNVQhjb_QOSIRYjuSJco3Wi_5Bp1T1Zv7OlzBs4e96Vj5R_xYB4KzBhYe9-etui; isg=BDY2WCV-dvURoAZdBw3uwj0Oh2yUQwE5YzQQ9qAfIpm149Z9COfKoZwV-_8q0HKp',
           # 'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
            #'referer': 'https://detail.tmall.com/item.htm?spm=a1z10.5-b-s.w4011-17205939323.51.30156440Aer569&id=41212119204&rn=06f66c024f3726f8520bb678398053d8&abbucket=19&on_comment=1&sku_properties=134942334:3226348',
           # 'accept': '*/*',
           # 'accept-encoding':'gzip, deflate, br',
           # 'accept-language': 'zh-CN,zh;q=0.9' }
        headers = {
            'cookie':'cna=ur2OFGu3zAECAbfAZ87Xj9f0; t=28985a1bc11f6249071229dd7ebe596f; _tb_token_=feaeb6ea76785; cookie2=186484bd98cb781f361488d9ccd2a72b; _m_h5_tk=0aeb5e722ff95c9d5ebd14c6c2a388e0_1554086456180; _m_h5_tk_enc=42cc6a4aa5ad1a10b3c854e267bef2fd; hng=""; uc1=cookie16=UIHiLt3xCS3yM2h4eKHS9lpEOw%3D%3D&cookie21=W5iHLLyFeYZ1WM9hVLhR&cookie15=WqG3DMC9VAQiUQ%3D%3D&existShop=false&pas=0&cookie14=UoTZ4M%2BrWDZvbw%3D%3D&tag=8&lng=zh_CN; uc3=vt3=F8dByEnaSOX2hI0x%2Flc%3D&id2=UUjYEhxYRc%2Bb0g%3D%3D&nk2=AKmWMKrN5g%3D%3D&lg2=UIHiLt3xD8xYTw%3D%3D; tracknick=c%5Cu7A0B%5Cu658C11; _l_g_=Ug%3D%3D; ck1=""; unb=2095757505; lgc=c%5Cu7A0B%5Cu658C11; cookie1=VTk8ZckjyJj%2BPMzo3jhAzO%2FrrE4YUEkrho3lLZITCbw%3D; login=true; cookie17=UUjYEhxYRc%2Bb0g%3D%3D; _nk_=c%5Cu7A0B%5Cu658C11; uss=""; csg=b92c695d; skt=93af3508c370502e; enc=J%2Bc9V95EfehZCN%2FodOw9yZLzFzR8Z5LwffPmyyWDvp6seyV2cwyxOatqTP3PX8mizlsqdaVRUJsbhSUA9haa2Q%3D%3D; otherx=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0; x=__ll%3D-1%26_ato%3D0; x5sec=7b22726174656d616e616765723b32223a226337336239666531396163653138636332383865363231613739393631393632434c2f3168655546454c586a352f6d2f3749443847686f4d4d6a41354e5463314e7a55774e547378227d; whl=-1%260%260%260; l=bBSUwCDrv4AO5OiwBOfGNZi4LTbOBIOfGsPPhqdi1ICPOgCX5YxRWZsXQx8WC3GVa6FkR37siLhaBP83Iy4Eh; isg=BOXl3mwn1UUoGTE0lEaaml7N9KEVWph-J2y-4efL35wu_gFwr3OshU7YiCItfrFs',
            'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36',
            'accept': '*/*',
            'accept-encoding': 'gzip, deflate, br',
             'accept-language': 'zh-CN,zh;q=0.9' }
        #解析JS文件内容
        content = requests.get(COMMENT_PAGE_URL[i],headers=headers).text
        nk = re.findall('"displayUserNick":"(.*?)"', content)
        nickname.extend(nk)
        print(nk)
        auctionSku.extend(re.findall('"auctionSku":"(.*?)"', content))
        ratecontent.extend(re.findall('"rateContent":"(.*?)"', content))
        ratedate.extend(re.findall('"rateDate":"(.*?)"', content))
        content_add.extend(re.findall('"content":"(.*?)"', content))
        goldUser.extend(re.findall('"goldUser":"(.*?)"', content))
        Days.extend(re.findall('"days":"(.*?)"', content))
    #将数据写入TEXT文件中
    for i in list(range(0, len(nickname))):
        text = '>'.join((nickname[i],goldUser[i], ratedate[i], auctionSku[i], ratecontent[i], Days[i], content_add[i])) + '\n\n'
        with open(r"E:\TmallContent.txt", 'a+',encoding='UTF-8') as file:
            file.write(text + ' ')
            print(i+1,":写入成功")

#主函数
if __name__ == "__main__":
    Page_Num = 1
    Get_Url(Page_Num)
    GetInfo(1)

爬去评论信息问题 加上goldUser[i]和Days[i]之后就报错 求指导_第1张图片

 

本来好好地  但是加上goldUser[i]和Days[i]之后 就报错,如下:


Traceback (most recent call last):
  File "F:/PYTHON CONTANT/草稿.py", line 68, in
    GetInfo(1)
  File "F:/PYTHON CONTANT/草稿.py", line 59, in GetInfo
    text = '>'.join((nickname[i],goldUser[i], ratedate[i], auctionSku[i], ratecontent[i], Days[i], content_add[i])) + '\n\n'
IndexError: list index out of range

 

小白求指导 感谢感谢感谢

你可能感兴趣的:(爬去评论信息问题 加上goldUser[i]和Days[i]之后就报错 求指导)