chromedriver驱动下载方式参考:https://blog.csdn.net/muriyue6/article/details/101440353
通过微博绑定淘宝账号代码参考文档链接:https://blog.csdn.net/qq_45327272/article/details/99053395
视频教学(爬取数据部分)参考链接:https://www.bilibili.com/video/BV1qJ411y71J?p=9
import csv
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from pyquery import PyQuery as pq
import re
chromedriver_path = "C:/Users/mqm/Desktop/chromedriver.exe" # 改成你的chromedriver
class taobao_infos:
# 对象初始化
def __init__(self):
url = 'https://login.taobao.com/member/login.jhtml'
self.url = url
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches',
['enable-automation']) # 此步骤很重要,设置为开发者模式,防止被各大网站识别出来使用了Selenium
self.browser = webdriver.Chrome(executable_path=chromedriver_path, options=options)
self.wait = WebDriverWait(self.browser, 10) # 超时时长为10s
# 创建打开文件并写入的对象f 淘宝数据保存在csv文件中
self.f = open('%s_data.csv'%iphone, 'w',newline='',encoding='utf8') # newline='' 代表没生成一行时不换行
# 创建csv对象
self.csv_write = csv.writer(self.f )
# csv 先生成第一行标题栏
self.csv_write.writerow(['手机广告词', '手机价格', '成交量', '手机图片地址', '商家', '发货地'])
# 登录淘宝
def login(self):
# 打开网页
self.browser.get(self.url)
# 等待 密码登录选项 出现
password_login = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, '#login > div.login-content.nc-outer-box > div > div.login-blocks.login-switch-tab > a.password-login-tab-item')))
password_login.click()
# 等待 微博登录选项 出现
weibo_login = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.weibo-login')))
weibo_login.click()
# 等待 微博账号 出现
weibo_user = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.username > .W_input')))
weibo_user.send_keys(weibo_username)
# 等待 微博密码 出现
weibo_pwd = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.password > .W_input')))
weibo_pwd.send_keys(weibo_password)
# 等待 登录按钮 出现
submit = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.btn_tip > a > span')))
submit.click()
# 直到获取到淘宝会员昵称才能确定是登录成功
taobao_name = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,
'a.site-nav-login-info-nick ')))
# 输出淘宝昵称
print(taobao_name.text)
#搜索
def search(self):
try:
self.browser.get('https://www.taobao.com')
input = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,"#q"))
)
submit = self.wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR,'#J_TSearchForm > div.search-button > button'))
)
input.send_keys(iphone)
submit.click()
total = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > div.total'))
)
return total.text
except TimeoutError:
return self.search()
#换页操作
def next_page(self,page_number):
try:
input = self.wait.until(
EC.presence_of_element_located((By.CSS_SELECTOR, "#mainsrp-pager > div > div > div > div.form > input"))
)
submit2 = self.wait.until(
EC.element_to_be_clickable((By.CSS_SELECTOR, '#mainsrp-pager > div > div > div > div.form > span.btn.J_Submit'))
)
input.clear()
input.send_keys(page_number)
submit2.click()
# 用高量的标签判断是否翻页了
self.wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR,'#mainsrp-pager > div > div > div > ul > li.item.active > span'),str(page_number)))
#调用每个页面详情数据的爬取
self.get_products(page_number)
except TimeoutError:
self.next_page(page_number)
# 获取详情
def get_products(self,page_number):
self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'#mainsrp-itemlist .items .item')))
html = self.browser.page_source
doc = pq(html)
items = doc('#mainsrp-itemlist .items .item').items()
for i in items:
product = {
'image':i.find('.pic .img').attr('src'),
'price':i.find('.price').text(),
'deal':i.find('.deal-cnt').text()[:-3],
'title':i.find('.title').text(),
'shop':i.find('.shop').text(),
'location':i.find('.location').text()
}
# 写入数据
self.csv_write.writerow([product['title'],product['price'],product['deal'],product['image'],product['shop'],product['location']])
print('分页%s的数据已完成'%page_number)
def close_csv(self):
self.f.close()
if __name__ == '__main__':
weibo_username = "*****" # 改成你的微博账号
weibo_password = "*****" # 改成你的微博密码
iphone = input('请输入想要搜索的商品:')
a = taobao_infos()
a.login() # 登录
total = a.search()
total = int(re.compile('(\d+)').search(total).group(1))
print(total) # 分页 页码
for i in range(2,total+1):
a.next_page(i)
#关闭文件
a.close_csv()
直接跑起代码需要:事先下载好chromedriver驱动,1.在代码中配置驱动的路径,我的驱动路径就放在了桌面。2.在代码里main中,要改成自己微博账号和密码。(一定要用自己账号事先用微博授权登录过淘宝)
爬取的数据我用csv保存