Python爬取美团店铺评论

  • 爬取美团一家店所有评论数据,后续用来做正负情感分析
  • 运行代码时候遇到一个关于chromedriver的问题。
  • 你需要查看自己Chrome浏览器版本(在浏览器中输入chrome://version/查看版本)然后下载版本对应的chromedriver。
  • 下载chromedriver的地址:http://chromedriver.storage.googleapis.com/index.html

 

# -*- coding: utf-8 -*-
"""
Created on Wed Apr 22 10:03:28 2020

@author: Aaron
"""
from selenium import webdriver
from time import sleep
import pandas as pd

class restaurant(object):
    def init_driver(self):
        self.url = "https://meituan.com/meishi/1467844/"
        self.path = "C:/Program Files/Google/Chrome/Application/chromedriver"
        self.driver = webdriver.Chrome(executable_path=self.path)

    def get_review(self):
        self.init_driver()
        self.driver.get(url=self.url)
        comment_data_list = pd.DataFrame(columns=['text'])
        for i in range(10):
            sleep(1)
            comment_div = self.driver.find_element_by_class_name('comment')
            com_cont = comment_div.find_element_by_class_name('com-cont')
            # div = com_cont.find_elements_by_css_selector('div')
            comment_list = com_cont.find_elements_by_css_selector("[class='list clear']")
            for comment in comment_list:
                text = str(comment.find_element_by_class_name('desc').text)
                print(text)
                if text != '':
                    comment_data_list = comment_data_list.append({'text': text}, ignore_index=True)
            try:
                com_cont.find_element_by_css_selector("[class='iconfont icon-btn_right']").click()
            except:
                print("no page")
                break

        review_data = pd.read_csv("../data/res_comment.csv", encoding='utf-8')
        review_data = review_data.append(comment_data_list, ignore_index=True)
        print(review_data)
        review_data.to_csv("../data/res_comment.csv", encoding='utf-8', index=False)
        self.driver.close()


if __name__ == '__main__':
    res = restaurant()
    res.get_review()

 

你可能感兴趣的:(python,爬虫)