用selenium爬取京东某商品的差评100条

from selenium import webdriver
option = webdriver.ChromeOptions()
option.add_argument('headless')
from selenium.webdriver.common.action_chains import ActionChains
import csv
import time
def jddata():
    commts=[]
    for i in range(1,11,1):
        cmts = wd.find_elements_by_css_selector('#comment-6 .comment-item')
        list=analyse(cmts)
        commts =commts +list
        print("翻第", i, "页")
        next = wd.find_element_by_xpath('// *[ @ id = "comment-6"] / div[12] / div / div / a[7]') # 点击下一页
        actions.move_to_element(next)
        time.sleep(1)
        wd.execute_script("arguments[0].click();", next)
        time.sleep(3)
    save_csv(commts)
    return commts

def analyse(cmts):
    list = []
    for i in cmts:
        name = i.find_element_by_class_name("user-info").text
        content = i.find_element_by_class_name("comment-con").text
        time = i.find_element_by_class_name("order-info").find_elements_by_tag_name("span")[-1].text
        print(name + "," + time)
        list.append([name, time, content])
    return list


def save_csv(commts):
    with open('D:\Python\京东差评.csv', 'w',encoding='0') as file:
        writer = csv.writer(file)
        writer.writerow(["发布者", "发布时间", "发布内容"])
        for i in commts:
            writer.writerow([i[0], i[1], i[2]])
        file.close()
        print("文件存储完毕")


if __name__ == "__main__":

    option = webdriver.ChromeOptions()
    # option.add_argument('headless')#无界面模式
    wd = webdriver.Chrome(r'E:\360浏览器下载\chromedriver_win32\chromedriver.exe', options=option)
    wd.get('https://item.jd.com/883575.html')  # 控制浏览器跳转到这个网页
    print("进入网页")
    actions = ActionChains(wd)
    wd.implicitly_wait(10)
    time.sleep(15)
    button = wd.find_element_by_xpath("//li[@clstag='shangpin|keycount|product|shangpinpingjia_1']")  # 获取商品评论按钮
    button.click()  # 控制按钮进行点击
    time.sleep(15)
    # wd.switch_to.frame(wd.find_element_by_css_selector('div .current >a'))
    wd.find_element_by_xpath("//li[@clstag='shangpin|keycount|product|chaping']/a").click()  # 点击差评
    time.sleep(15)

    jddata()

你可能感兴趣的:(爬虫)