python+selenium+webdriver+requests实现抓取页面指定内容并推送至企业微信

from selenium.webdriver import Chrome
import time
import pandas as pd
from selenium.webdriver.common.by import By
import requests
import json

browser = Chrome()
browser.get("需要抓取的网址")
time.sleep(1)
browser.find_element(By.ID,'username').send_keys('账号')
browser.find_element(By.ID,'password').send_keys('密码\n')
time.sleep(5)

browser.switch_to.window(browser.window_handles[-1])  #切换新窗口
#订单管理/采购订单
browser.find_element(By.XPATH,'/html/body/div[1]/div/div[1]/div/div[2]/div/div[1]/div/ul[12]/div/div/div').click()
time.sleep(2)
browser.find_element(By.XPATH,'//*[@id="446649214039097388"]/div/div').click()
time.sleep(2)
df = pd.DataFrame()
for i in range(2,10): #翻页
    number = f'/html/body/div[1]/div/div[2]/div[2]/div/div/div/div[2]/div[2]/div/div/div[1]/div[1]/div[2]/div[5]/div[1]/div[2]/span[{i}]'
    number_list = browser.find_element(By.XPATH,number).click()
    time.sleep(5)
    for j in range(2,7):  #指定抓取字段
        bh_xpath = f'/html/body/div[1]/div/div[2]/div[2]/div/div/div/div[2]/div[2]/div/div/div[1]/div[1]/div[2]/div[4]/div/div[3]/div[1]/div/div[1]/div/table/tr[{j}]/td[2]/div/div'
        qd_xpath = f'/html/body/div[1]/div/div[2]/div[2]/div/div/div/div[2]/div[2]/div/div/div[1]/div[1]/div[2]/div[4]/div/div[3]/div[1]/div/div[1]/div/table/tr[{j}]/td[5]/div/div'
        bz_xpath = f'/html/body/div[1]/div/div[2]/div[2]/div/div/div/div[2]/div[2]/div/div/div[1]/div[1]/div[2]/div[4]/div/div[3]/div[1]/div/div[1]/div/table/tr[{j}]/td[6]/div/div'
        ddly_xpath = f'/html/body/div[1]/div/div[2]/div[2]/div/div/div/div[2]/div[2]/div/div/div[1]/div[1]/div[2]/div[4]/div/div[3]/div[1]/div/div[1]/div/table/tr[{j}]/td[11]/div/div'
        bh_list = browser.find_element(By.XPATH, bh_xpath).text.split()  # 拼接字符串抓取指定内容
        qd_list = browser.find_element(By.XPATH,qd_xpath).text.split()
        bz_list = browser.find_element(By.XPATH,bz_xpath).text.split()
        ddly_list = browser.find_element(By.XPATH,ddly_xpath).text.split()
        if not (bz_list):  #判断是否为空
            bz_list = [None]
        if not(ddly_list):
            ddly_list = [None]
        for my_list in [bh_list]: #将抓取到的数据写入xlsx表
            temp_df = pd.DataFrame({'订单编号': bh_list,'渠道':qd_list,'备注':bz_list,'订单来源':ddly_list})
            df = pd.concat([df, temp_df], ignore_index=True)
        print(bh_list,qd_list,bz_list,ddly_list)
df.to_excel('cehsi.xlsx', index=False)
browser.quit()


#发送到企业微信
# 替换为自己的企业ID、应用ID、应用密钥
corpid = 'you-corpid'
agentid = 'you-agentid'
corpsecret = 'you-corpsecret'

# 获取access_token
url = f'https://qyapi.weixin.qq.com/cgi-bin/gettoken?corpid={corpid}&corpsecret={corpsecret}'
response = requests.get(url)
access_token = json.loads(response.text)['access_token']

# 上传文件到素材库
url = f'https://qyapi.weixin.qq.com/cgi-bin/media/upload?access_token={access_token}&type=file'
file_path = 'you-file_path'  #文件地址
with open(file_path, 'rb') as f:
    response = requests.post(url, files={'media': f})
media_id = json.loads(response.text)['media_id']

# 发送应用消息
url = f'https://qyapi.weixin.qq.com/cgi-bin/message/send?access_token={access_token}'
data = {
    "touser": "@all",
    "msgtype": "file",
    "agentid": agentid,
    "file": {
        "media_id": media_id
    },
    "safe": 0
}
response = requests.post(url, data=json.dumps(data))
print(response.text)

你可能感兴趣的:(python+selenium+webdriver+requests实现抓取页面指定内容并推送至企业微信)