Python 爬取“智能家居”相关数据

爬取“智能家居”相关数据

          • 爬取“智能门锁”
          • 简单处理一下数据

爬取“智能门锁”
from selenium import webdriver
# from selenium.webdriver.common.by import By
# from pyquery import PyQuery as pq
import time
import csv

browser = webdriver.Chrome()
# browser1 = webdriver.Chrome()

browser.get('http://search.gome.com.cn/search?intcmp=smart-1000066448-5&question=%E6%99%BA%E8%83%BD%E9%97%A8%E9%94%81&deliv=1&market=10&pzpq=0&pzin=v4')
# browser1.get('https://www.baidu.com')


def spider(page):
    browser.execute_script('window.scrollTo(0, 1000)')
    time.sleep(2)
    browser.execute_script('window.scrollTo(1000, 2000)')
    time.sleep(2)
    browser.execute_script('window.scrollTo(2000, 3000)')
    time.sleep(2)
    browser.execute_script('window.scrollTo(3000, 4000)')
    time.sleep(2)
    browser.execute_script('window.scrollTo(4000, 5000)')
    time.sleep(2)
    browser.execute_script('window.scrollTo(5000, 6000)')
    time.sleep(2)

    prices = browser.find_elements_by_class_name('item-price')
    description = browser.find_elements_by_class_name('item-name')
    evaluation = browser.find_elements_by_class_name('comment')

    input = browser.find_element_by_id('pNum')
    submit = browser.find_element_by_class_name('btn')

    print('正在爬取第', page, '页')
    with open('information.csv', 'a', encoding='utf-8') as f:
        writer = csv.writer(f)

        for i in range(len(prices)):
            writer.writerow([prices[i].text, description[i].text, evaluation[i].text])
            print(i)
            print(prices[i].text)
            print(description[i].text)
            print(evaluation[i].text)
    page += 1
    input.clear()
    input.send_keys(page)
    submit.click()
    spider(page)


# items = doc('.item-price-info').items()
# for item in items:
#     price = item.find('.item-price').text()
#     print(price)


spider(1)
browser.close()
from selenium import webdriver
import time
import csv

browser = webdriver.Chrome()
browser.get('https://b2b.baidu.com/s?q=%E6%99%BA%E8%83%BD%E9%97%A8%E9%94%81&from=search')


def find():
    try:
        # 查找节点
        price_list = browser.find_elements_by_xpath('//div[@class="p-card-price"]')
        goods_list = browser.find_elements_by_xpath('//div[@class="p-card-name"]')
        company_list = browser.find_elements_by_xpath('//div[contains(@class,"p-card-company") or '
                                                      '@class = "p-card-company-vip"]')
        address_list = browser.find_elements_by_xpath('//div[@class="p-card-address"]')
        with open('Doorlock.csv', 'a', encoding='utf-8') as f:
            writer = csv.writer(f)
            for i in range(0, len(price_list)):
                    writer.writerow([price_list[i].text, goods_list[i].text, company_list[i].text, address_list[i].text])
                    print(i)
                    print(price_list[i].text)
                    print(goods_list[i].text)
                    print(company_list[i].text)
                    print(address_list[i].text)
        button = browser.find_element_by_xpath('//li[@class="ivu-page-next ivu-page-custom-text"]')
        button.click()
        time.sleep(2)
        find()
    except:
        print('爬取结束')


find()
browser.close()
简单处理一下数据
import pandas as pd
import matplotlib.pyplot as plt

column_name = ['价格', '产品', '公司', '地址']

data = pd.read_csv('Doorlock.csv', names=column_name)

# 删除“¥”和“元”
yuan = data['价格'].str.contains('元').fillna(False)
for i, y in data[yuan].iterrows():
    price = float(y['价格'][1:-1])
    data.at[i, '价格'] = '{}'.format(price)

# 删除价格里带有“面议”的行
data = data[~ data['价格'].str.contains('面议')]

print(data)

你可能感兴趣的:(Python 爬取“智能家居”相关数据)