利用winapi 抓取网页保存

首先本文要实现的是打开指定网页,右击保存为指定名.txt,网页保存类型为txt,后续再读取txt处理。

好下面准备工具:

1.spyxx(安装有VS的同学可以直接在vs安装目录下找到:\Program Files (x86)\Microsoft Visual Studio 14.0\Common7\Tools\spyxx.exe)

        查看spyxx窗口,找到另存为对话框窗口句柄。可知道每个以下子窗口句柄怎么通过win32api找到:

spyxx

2.确保已安装:selenium, win32gui, win32api, win32con(可通过相应pip安装)

3.下载geckodriver.exe (https://github.com/mozilla/geckodriver/releases/)下载完成后拷贝至Mozilla Firefox安装目录(C:\Program Files\Mozilla Firefox),确保Mozilla Firefox安装目录在环境变量PATH中。

4. 代码:

from selenium import webdriver

import win32gui, win32api, win32con

def save_url(driver, URL_BASE , saved_file_name):

    driver.get(URL_BASE)

    win32api.keybd_event(VK_CODE['ctrl'], 0, 0)

    win32api.keybd_event(VK_CODE['s'], 0, 0)

    win32api.keybd_event(VK_CODE['ctrl'], 0, win32con.KEYEVENTF_KEYUP,0)

    win32api.keybd_event(VK_CODE['s'], 0, win32con.KEYEVENTF_KEYUP,0)

    time.sleep(0.5)

    hld=win32gui.FindWindow("#32770", u"另存为")

    win32gui.SetForegroundWindow(hld)

    #left, top, right, bottom = win32gui.GetWindowRect(hld)

    #hwndChildList = []

    #win32gui.EnumChildWindows(hld, lambda hwnd, param: param.append(hwnd),  hwndChildList)

    #show_windows(hwndChildList)

    #获取文件名输入框

    a1 = win32gui.FindWindowEx(hld,None,'DUIViewWndClassName',None)

    a2 = win32gui.FindWindowEx(a1,None,"DirectUIHWND",None)

    a3 = win32gui.FindWindowEx(a2,None,"FloatNotifySink",None)

    a4 = win32gui.FindWindowEx(a3,None,"ComboBox",None)

    hwnd_filename = win32gui.FindWindowEx(a4,None,"Edit",None)

    a31 = win32gui.FindWindowEx(a2,a3,"FloatNotifySink",None)

    a32 = win32gui.FindWindowEx(a2,a31,"FloatNotifySink",None)

    a5 = win32gui.FindWindowEx(a32,None,"ComboBox",None)

    time.sleep(0.5)

    #输入保存文件名

    win32gui.SendMessage(hwnd_filename, win32con.WM_SETFOCUS,0,0)

    win32gui.SendMessage(hwnd_filename, win32con.WM_SETTEXT, None, saved_file_name)

    #修改保存类型

    win32api.SendMessage(a5, win32con.CB_SHOWDROPDOWN , 1,0)

    win32api.SendMessage(a5, win32con.CB_SETCURSEL , 2,0) #选择第二个下拉单

    win32gui.SendMessage(a5, win32con.WM_SETFOCUS,0,0)

    win32gui.SendMessage(a5, win32con.WM_LBUTTONDOWN, 0, 0)

    win32gui.SendMessage(a5, win32con.WM_LBUTTONUP, 0, 0)

    #win32gui.SendMessage(a5, win32con.WM_KEYDOWN, win32con.VK_RETURN, 0)

    #win32gui.SendMessage(a5, win32con.WM_KEYUP, win32con.VK_RETURN, 0)

    #win32gui.SendMessage(a5, win32con.CBN_SELCHANGE)

    #win32gui.SendMessage(a5, win32con.CBN_SELENDOK)

    win32gui.PostMessage(hwnd_filename, win32con.WM_KEYDOWN, win32con.VK_RETURN, 0)

    win32gui.PostMessage(hwnd_filename, win32con.WM_KEYUP, win32con.VK_RETURN, 0)

    time.sleep(0.5)

    #点击保存

    hwnd_save = win32gui.FindWindowEx(hld,None,"Button",None)

    win32gui.PostMessage(hwnd_save, win32con.WM_LBUTTONDOWN, win32con.MK_LBUTTON, 0)

    win32gui.PostMessage(hwnd_save, win32con.WM_LBUTTONUP, win32con.MK_LBUTTON, 0)

    #driver.quit()


下面就可以开始了:

driver = webdriver.Firefox()
save_url(driver, URL_BASE, saved_file_name)


你可能感兴趣的:(利用winapi 抓取网页保存)