絮叨两句:
博主是一名软件工程系的在校生,利用博客记录自己所学的知识,也希望能帮助到正在学习的同学们
人的一生中会遇到各种各样的困难和折磨,逃避是解决不了问题的,唯有以乐观的精神去迎接生活的挑战
少年易老学难成,一寸光阴不可轻。
最喜欢的一句话:今日事,今日毕
Selenium系列:
对Selenium进行练习操作
提示:以下是本篇文章正文内容,下面案例可供参考
机油地址:https://list.jd.com/list.html?cat=6728,6742,11849
from bs4 import BeautifulSoup
import requests
import time as ti
import uuid
from selenium import webdriver
def getHTML(url):
headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 Edg/84.0.522.44'
}
request = requests.get(url, headers=headers)
# request=requests.get(url,headers=headers)
return request.text
def getJXHTML(html):
JY_soup = BeautifulSoup(JY_HTML, 'html.parser')
J_valueList_li_All = JY_soup.find('div', attrs={'class': 'sl-v-logos'}).find('ul', attrs={'class': 'J_valueList v-fixed'}).findAll('li')
bowser = webdriver.Chrome()
for li in J_valueList_li_All:
brand_href=f"https://list.jd.com{li.find('a')['href']}"
brand_name = f"{li.find('a')['title']}"
print("品牌分类:----->", brand_name, brand_href)
bowser.get(brand_href)
windows(bowser)
brand_html=bowser.page_source
ca_Html = BeautifulSoup(brand_html, 'html.parser')
b_title = ca_Html.find('span', attrs={'class': 'p-skip'})
if b_title==None:
'''
直接解析获取商品
'''
getProduct(ca_Html, brand_name=brand_name)
else:
b_fy_Number=int(b_title.find('b').text)-1
print("--------------------------------------第一页---------------------------------")
'''获取当前页的数据'''
getProduct(ca_Html, brand_name=brand_name)
for i in range(1,b_fy_Number):
print("------------------------------下一页-------------------------------")
bowser.find_element_by_class_name('pn-next').click()
windows(bowser)
html_fy = bowser.page_source
fy_httml = BeautifulSoup(html_fy, 'html.parser')
getProduct(fy_httml,brand_name=brand_name)
def getProduct(barn_soup,brand_name):
li_All=barn_soup.find('div',attrs={'id':'J_goodsList'}).findAll('li')
bowser_there=webdriver.Chrome()
for li in li_All:
db = "INSERT INTO `xxuan_car_jd_mobil_product` VALUES (NULL,"
sql={'name':'',
'brand':'',
'type':'',
'originplace':'',
'netweight':'',
'price':'',
'commodity_Name':'',
'image':'',
'viscosity':'',
'volume':''
}
sql['brand']=brand_name
span_hk=li.find('div',attrs={'class':'p-name p-name-type-3'}).find('a').find('em').find('span',attrs={'class':'p-tag'})
if span_hk==None:
li_href=li.find('a')
if li_href!=None:
li_href=li_href['href']
else:
continue
li_price=li.find('div',attrs={'class':'p-price'})
if li_price!=None:
li_price=li_price.find('i').text
# print(li_price)
# ti.sleep(1)
if not str(li_href).__contains__("https://"):
https_li_href=f"https:{li_href}"
print("商品链接:",https_li_href)
product_This(db=db,sql=sql,https_li_href=https_li_href,li_price=li_price,bowser=bowser_there)
else:
https_li_href=li_href
print("商品链接:",https_li_href)
product_This(db=db,sql=sql,https_li_href=https_li_href,li_price=li_price,bowser=bowser_there)
else:
continue
def product_This(db,sql,https_li_href,li_price,bowser):
bowser.get(https_li_href)
windows(bowser)
product_HTML=bowser.page_source
produc_soup = BeautifulSoup(product_HTML, 'html.parser')
# 商品标题名称
sku_name_wrap = produc_soup.find('div', attrs={'class': 'itemInfo-wrap'})
if sku_name_wrap!=None:
sku_name=sku_name_wrap.find('div', attrs={'class': 'sku-name'})
if sku_name != None:
sku_name = sku_name.text
sku_name = str(sku_name).strip()
sql['commodity_Name'] = sku_name
# print("商品标题名称:",sku_name)
# print('商品价格:',li_price)
sql['price'] = li_price
# 商品图片
spec_img = produc_soup.find('img', attrs={'id': 'spec-img'})
if spec_img == None:
spec_img = 'NULL'
else:
spec_img = spec_img['data-origin']
# print("https:",spec_img)
imageURL = f"https:{spec_img}"
if imageURL.__contains__('NULL'):
sql['image'] = f"NULL"
else:
sql['image'] = imageURL
# 商品规格信息
parameter_list = produc_soup.find('ul', attrs={'class': 'parameter2 p-parameter-list'})
if parameter_list != None:
li_all_parameter = parameter_list.findAll('li')
for li in li_all_parameter:
if str(li.text).__contains__('商品名称:'):
if li.text == None:
sql['name'] = 'NULL',
else:
sql['name'] = str(li.text).replace('商品名称:', '')
elif str(li.text).__contains__('商品编号:'):
if li.text == None:
# li.text = 'NULL',
pass
else:
pass
# li.text = li.text
elif str(li.text).__contains__('商品毛重:'):
if li.text == None:
sql['netweight'] = 'NULL',
else:
sql['netweight'] = str(li.text).replace('商品毛重:', '')
elif str(li.text).__contains__('商品产地:'):
if li.text == None:
sql['originplace'] = 'NULL',
else:
sql['originplace'] = str(li.text).replace('商品产地:', '')
elif str(li.text).__contains__('粘度:'):
if li.text == None:
sql['viscosity'] = 'NULL',
else:
sql['viscosity'] = str(li.text).replace('粘度:', '')
elif str(li.text).__contains__('机油种类:'):
if li.text == None:
sql['type'] = 'NULL',
else:
sql['type'] = str(li.text).replace('机油种类:', '')
elif str(li.text).__contains__('容量:'):
if li.text == None:
sql['volume'] = 'NULL',
else:
sql['volume'] = str(li.text).replace('容量:', '')
# print(sql)
for i in sql:
if len(str(sql[i])) == 0:
sql[i] = 'NULL'
if i != "volume":
db += f"'{sql[i]}',"
else:
db += f"'{sql[i]}');"
with open('D:\\数据\\京东\\sql\\xxuan_car_jd_mobil_product.txt', 'a', encoding='utf-8') as w:
w.write(db + '\r')
# print(db)
def windows(browser):
for i in range(0, 10000, 10):
windowBout(browser, i)
for i in range(10000, 0, -10):
windowTop(browser, i)
for i in range(0, 10000, 10):
windowBout(browser, i)
def windowBout(browser,i):
js = f"window.scrollTo(0,{i})"
browser.execute_script(js)
def windowTop(browser, i):
js = f"window.scrollTo(0,{i})"
browser.execute_script(js)
if __name__ == '__main__':
url="https://list.jd.com/list.html?cat=6728,6742,11849"
JY_HTML=getHTML(url)
getJXHTML(JY_HTML)