爬虫用的 bs4+requests
上传用的 wordpress_xmlrpc
#coded by 伊玛目的门徒
#coding=utf-8
from wordpress_xmlrpc import Client, WordPressPost
from wordpress_xmlrpc.methods.posts import GetPosts, NewPost
from wordpress_xmlrpc.methods.users import GetUserInfo
import time
import requests
from bs4 import BeautifulSoup
header={'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.XXXX.XXX Safari/537.36'}
def getcontent():
try:
html=requests.get('https://futures.hexun.com/2019-12-17/199706366.html',headers=header)
html.encoding='gbk'
Soup = BeautifulSoup(html.text, "lxml")
con=Soup.select('div.art_contextBox p')
cont=''
for y in con:
#print (type(str(y)))
cont=cont+str(y)
print (cont)
return (cont)
except:
pass
def wpsend(content):
wp = Client('http://www.lianhanghao.xyz/xmlrpc.php', '***' '***')
print (content)
post = WordPressPost()
post.title = '实例演示'
post.content = " ''' "+ content +" ''' "
post.post_status = 'publish'
post.terms_names = {
'post_tag': ['test'],
'category': [ 'Tests']
}
wp.call(NewPost(post))
localtime = time.localtime(time.time())
print ('文档已上传 {}'.format(time.strftime("%Y-%m-%d %H:%M:%S",localtime)))
wpsend(getcontent())
网站在 http://www.lianhanghao.xyz/tests/317362
演示视频: https://www.bilibili.com/video/av80056985/
python爬虫自动采集并上传更新网站