监控页面信息

需要监控某页面是否更新自己感兴趣的内容,试了好多博客,目前发现一个能用的改造后如下:

(为何改造呢,因为发现标签与感兴趣的内容必须根据具体url来改造,谷歌浏览器的话,fn+F12后再F5,查看network下内容)

监控页面信息_第1张图片

我需要的信息在上图1?1591864294778那里:

监控页面信息_第2张图片

#coding:utf-8
import requests
import json
import time

#
def getList():
    url = 'http://dl.scs.gov.cn/api/article/articlelist/all/4028818d6cb2038f016cb8af1075001a/0000000062b7b2b60162bccf480c000a/1?_=' + str(round(time.time() * 1000))#'http://www.toutiao.com/c/hot_words/'
    wbdata = requests.get(url).text
    data = json.loads(wbdata)
    news = data['articleList']
    return news
#for n in getList():
#    print(n['articleTitle'])
while 1:
    news = getList()
    for n in news:
        if (n['articleTitle'].find('银') >= 0) and (n['articleTitle'].find('险') >= 0):
            print(n['articleTitle'])
            time.sleep(5)
            print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@##########')
            time.sleep(5)
            print('##################################################################')
            time.sleep(5)
            print('999999999999999999999999999999999999999999999999999999999999999999')
    time.sleep(30)
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 11 10:08:27 2020

@author: zhang
"""

#coding:utf-8
import requests
import json
import time
import win32api,win32con
 
#今日头条热词获取,get方法
def getList():
    url = 'http://dl.scs.gov.cn/api/article/articlelist/all/4028818d6cb2038f016cb8af1075001a/0000000062b7b2b60162bccf480c000a/1?_=' + str(round(time.time() * 1000))#'http://www.toutiao.com/c/hot_words/'
    wbdata = requests.get(url).text
    #print(url)
    data = json.loads(wbdata)
    news = data['articleList']
    return news
while 1:
    news = getList()
    news_list = []
    for n in news:
        news_list.append(n["articleTitle"])
    for n in news:
        if (n['articleTitle'].find('银') >= 0) and (n['articleTitle'].find('监') >= 0):
            print(n['articleTitle'])
            time.sleep(5)
            print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@##########')
            time.sleep(5)
            print('##################################################################')
            time.sleep(5)
            print('999999999999999999999999999999999999999999999999999999999999999999')
    time.sleep(30)
    news2 = getList()
    news2_list = []
    for n in news2:
        news2_list.append(n["articleTitle"])   
    c = [x for x in news2_list if x not in news_list]
    if len(c) > 0:
        print(c)
        ##提醒OK消息框
        win32api.MessageBox(0, "这是一个测试提醒OK消息框", "提醒",win32con.MB_OK)

 

# -*- coding: utf-8 -*-
"""
Created on Thu Jun 11 10:08:27 2020

@author: zhang
"""

#coding:utf-8
import requests
import json
import time
import win32api,win32con

win32api.MessageBox(0, "这是一个测试提醒OK消息框", "提醒",win32con.MB_OK)
 
#今日头条热词获取,get方法
def getList():
    url = 'http://dl.scs.gov.cn/api/article/articlelist/all/4028818d6cb2038f016cb8af1075001a/0000000062b7b2b60162bccf480c000a/1?_=' + str(round(time.time() * 1000))#'http://www.toutiao.com/c/hot_words/'
    news = []
    while 1:
        try:
            wbdata = requests.get(url).text
            #print(url)
            data = json.loads(wbdata)
            news = data['articleList']
        except (Exception, requests.exceptions.RequestException) as e:
            print(e)
            time.sleep(300)
            continue
        else:
            break

    return news

while 1:
    news = getList()
    news_list = []
    for n in news:
        news_list.append(n["articleTitle"])
    for n in news:
        if (n['articleTitle'].find('银') >= 0) and (n['articleTitle'].find('监') >= 0):
            print(n['articleTitle'])
            time.sleep(5)
            print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@##########')
            time.sleep(5)
            print('##################################################################')
            time.sleep(5)
            print('999999999999999999999999999999999999999999999999999999999999999999')
    time.sleep(30)
    news2 = getList()
    news2_list = []
    for n in news2:
        news2_list.append(n["articleTitle"])   
    c = [x for x in news2_list if x not in news_list]
    if len(c) > 0:
        print(c)
        ##提醒OK消息框
        win32api.MessageBox(0, "这是一个测试提醒OK消息框", "提醒",win32con.MB_OK)
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 11 10:08:27 2020

@author: zhang
"""

#coding:utf-8
import requests
import json
import time
import win32api,win32con
from datetime import datetime

win32api.MessageBox(0, "start!!!", "@@##@@##",win32con.MB_OK)
 
#今日头条热词获取,get方法
def getList():
    url = 'http://www.cbirc.gov.cn/cn/static/data/DocInfo/SelectDocByItemIdAndChild/data_itemId=925,pageIndex=1,pageSize=18.json'#'http://www.toutiao.com/c/hot_words/'
    news = []
    while 1:
        try:
            wbdata = requests.get(url).text
            #print(url)
            data = json.loads(wbdata)
            news = data['data']['rows']
        except (Exception, requests.exceptions.RequestException) as e:
            print(e)
            time.sleep(300)
            continue
        else:
            #print(news)
            break

    return news

while 1:
    news = getList()
    news_list = []   
    for n in news:
        news_list.append(n['docSubtitle'])
    time.sleep(30)
    news2 = getList()
    news2_list = []
    for n in news2:
        news2_list.append(n['docSubtitle'])   
    c = [x for x in news2_list if x not in news_list]
    
    for d in c:
        if (n['docSubtitle'].find('知') >= 0) and (n['docSubtitle'].find('面') >= 0):
            ##提醒OK消息框
            win32api.MessageBox(0, "####IRC####", "@@##@@##",win32con.MB_OK)
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 11 10:08:27 2020

@author: zhang
"""

#coding:utf-8
import requests
import json
import time
import win32api,win32con
import winsound

win32api.MessageBox(0, "这是一个测试提醒OK消息框", "提醒",win32con.MB_OK)
wrong = [False]
#今日头条热词获取,get方法
def getList():
    url = 'http://dl.scs.gov.cn/api/article/articlelist/all/4028818d6cb2038f016cb8af1075001a/0000000062b7b2b60162bccf480c000a/1?_=' + str(round(time.time() * 1000))#'http://www.toutiao.com/c/hot_words/'
    news = []
    while 1:
        try:
            wbdata = requests.get(url).text
            #print(url)
            data = json.loads(wbdata)
            news = data['articleList']
        except (Exception, requests.exceptions.RequestException) as e:
            wrong[0] = True
            print(e)
            time.sleep(300)
            continue
        else:
            break
    return news

def do_ring(times):
    duration = 3000  # millisecond
    freq = 440  # Hz
    winsound.Beep(freq, duration)
    for i in range(0,times):
        winsound.PlaySound('C:\Windows\Media\Ring01.wav', winsound.SND_FILENAME)
news_list = []
news2_list = []
news_list_bak = []
first = True
while 1:
    news_list_bak = news_list.copy()
    wrong[0] = False
    news = getList()
    news_list = []
    for n in news:
        news_list.append(n["articleTitle"])
    c = [x for x in news_list if x not in news_list_bak]
    if len(c) > 0 and first == False and wrong[0] == True:
        do_ring(5)
        print(c)
        ##提醒OK消息框
        win32api.MessageBox(0, "这是一个测试提醒OK消息框", "提醒",win32con.MB_OK)
    time.sleep(30)
    news2 = getList()
    news2_list = []
    for n in news2:
        news2_list.append(n["articleTitle"])   
    c = [x for x in news2_list if x not in news_list]
    if len(c) > 0:
        do_ring(5)
        print(c)
        ##提醒OK消息框
        win32api.MessageBox(0, "这是一个测试提醒OK消息框", "提醒",win32con.MB_OK)
    if first == True:
        first = False
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 11 10:08:27 2020

@author: zhang
"""

#coding:utf-8
import requests
import json
import time
import win32api,win32con
import winsound

win32api.MessageBox(0, "start!!!", "@@##@@##",win32con.MB_OK)
wrong = [False]
#今日头条热词获取,get方法
def getList():
    url = 'http://www.cbirc.gov.cn/cn/static/data/DocInfo/SelectDocByItemIdAndChild/data_itemId=925,pageIndex=1,pageSize=18.json'#'http://www.toutiao.com/c/hot_words/'
    news = []
    while 1:
        try:
            wbdata = requests.get(url).text
            #print(url)
            data = json.loads(wbdata)
            news = data['data']['rows']
        except (Exception, requests.exceptions.RequestException) as e:
            wrong[0] = True
            print(e)
            time.sleep(300)
            continue
        else:
            #print(news)
            break
    return news
news_list = []
news2_list = []
news_list_bak = []
first = True
def do_ring(times):
    duration = 3000  # millisecond
    freq = 440  # Hz
    winsound.Beep(freq, duration)
    for i in range(0,times):
        winsound.PlaySound('C:\Windows\Media\Alarm01.wav', winsound.SND_FILENAME)

while 1:
    news_list_bak = news_list.copy()
    wrong[0] = False
    news = getList() 
    news_list = []
    for n in news:
        news_list.append(n['docSubtitle'])
    c = [x for x in news_list if x not in news_list_bak]
    if len(c) > 0 and first == False and wrong[0] == True:
        do_ring(5)
        print(c)
        ##提醒OK消息框
        win32api.MessageBox(0, "这是一个测试提醒OK消息框", "提醒",win32con.MB_OK)
    time.sleep(30)
    news2 = getList()
    news2_list = []
    for n in news2:
        news2_list.append(n['docSubtitle'])   
    c = [x for x in news2_list if x not in news_list]
    
    for d in c:
        if (n['docSubtitle'].find('告') >= 0) and (n['docSubtitle'].find('面') >= 0):
            do_ring(5)
            ##提醒OK消息框
            win32api.MessageBox(0, "####IRC####", "@@##@@##",win32con.MB_OK)

 

后台运行python的方法(不用开着Spyder或者cmd窗口了):

先配置环境变量,再在cmd下执行 pythonw fullpath.py

执行命令:

监控页面信息_第3张图片

查看进程(其实代码中执行的MessageBox语句已经弹出来提示了):

监控页面信息_第4张图片

当然在.bat文件中写入下面的语句,然后保存,然后点击执行也行

pythonw.exe D:\python\untitled2.py

如果你想开机后台运行(静默模式,不带弹出cmd窗口的那种):把下面的bat文件放到C:\Users\zhang\AppData\Roaming\Microsoft\Windows\Start Menu\Programs\Startup   里面。

if "%1"=="hide" goto CmdBegin
start mshta vbscript:createobject("wscript.shell").run("""%~0"" hide",0)(window.close)&&exit
:CmdBegin
pythonw.exe D:\python\untitled2.py

当然了,也可以加入声音提醒啥的(邮件提醒太麻烦了,声音就行啦)

def do_ring(times):
    duration = 3000  # millisecond
    freq = 440  # Hz
    winsound.Beep(freq, duration)
    for i in range(0,times):
        winsound.PlaySound('C:\Windows\Media\Ring01.wav', winsound.SND_FILENAME)

参考资料

1.https://www.cnblogs.com/vhills/p/7288027.html

2.https://blog.csdn.net/weixin_39416561/article/details/84190336

3.https://www.cnblogs.com/sheng-247/p/10528160.html

4.https://blog.csdn.net/xieyan0811/article/details/102386873

5.https://blog.csdn.net/weixin_41822224/article/details/100167499

6.https://blog.csdn.net/IAlexanderI/article/details/88356415

你可能感兴趣的:(爬虫)