selsnium+PhantomJs实战美团

一,先看结果


selsnium+PhantomJs实战美团_第1张图片
image.png

二,思路

三,上源码

from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import time
from bs4 import BeautifulSoup
import re
import pymysql

class Meituan(object):
    def __init__(self):
        pass
        # self.conn =pymysql.Connect(host='x',user='x',password='x',port=x,database='x',charset='x')

    def get_meituan(self):
    # cursor=conn.cursor()
    # dataname =input('请输入数据库名:')
    # sql= "CREATE TABLE IF NOT EXISTS %s(ID INT(10) NOT NULL PRIMARY KEY AUTO_INCREMENT,TIME_A TIMESTAMP DEFAULT CURRENT_TIMESTAMP," \
    #      "A VARCHAR(255),B VARCHAR(255),C VARCHAR(255),D VARCHAR(255),E VARCHAR(255))ENGINE=INNODB DEFAULT CHARSET=utf8"
    # cursor.execute(sql%dataname)
    # print('创建成功!!')

        url = 'http://www.meituan.com/'
        #/s/可以修改城市


        browser = webdriver.Firefox()
        browser.set_window_size(900,900)
        timeout = WebDriverWait(browser,10)
        browser.get(url)
        time.sleep(3)



        try:
            ##############试试点击页面信息
            browser.find_element_by_xpath("//span/span/a[@class='link nav-text']").click()
            time.sleep(4)
            print('1')
        except:
            input_a = browser.find_element_by_xpath("//input[@type='text']")
            input_a.send_keys('北京')
            try:
            ############点击失效#########
                ##搜索框又变,换
                click_a = browser.find_element_by_xpath("//div/button[@class='header-search-btn']")
                click_a.click()
                print('2')
            except:
                #第三种方式点击
                click_a = browser.find_element_by_xpath("//input[@value='搜索']")
                click_a.click()
                print('3')
        footer = 1
        try:
            while True:
                down = 0
                for a in range(0,10000,1000):
                    browser.execute_script('window,scrollBy(0,{})'.format(a))
                    time.sleep(3)
                    down +=1
                    if down==5:
                        break
                html = browser.page_source
                print('html')
                soup =BeautifulSoup(html,'lxml')
                items = soup.find_all('div',class_='default-list-item clearfix')
                #print(items)
                print('items')
                for item in items:
                    item_name = item.find('a',class_='link item-title').get_text()
                    item_fen = item.find('div',class_='item-eval-info clearfix').get_text()
                    item_location = item.find('div',class_='address-info clearfix').get_text()
                    try:
                        item_price =item.find('div',class_='deal-info').get_text()
                    except:
                        print('item_price error!!')

                    print('{},{},{},{}'.format(item_name,item_fen,item_location,item_price))

                    ######数据库
                #     sql = "INSERT INTO %s(A,B,C,D)VALUES('%s','%s','%s','%s')"
                #     value = (dataname,item_name,item_fen,item_location,item_price)
                #     cursor.execute(sql%value)
                # conn.commit()
                # print('提交成功!!')
                # time.sleep(5)

                footer +=1
                browser.find_element_by_xpath("//li[@class='pagination-item next-btn active']/a").click()
                print('现在是第%s页!'% footer)
        except:
            print('error')

l = Meituan()
l.get_meituan()

你可能感兴趣的:(selsnium+PhantomJs实战美团)