csdn自动把你手写的标签里的style属性给过滤掉了,而博客园就不会!这些内容本不会显示的。
回顾:单线程+多任务,速度会很快。
server.py
import time
from flask import Flask, render_template
app = Flask(__name__)
@app.route('/index')
def index():
time.sleep(2)
return render_template('test.html')
@app.route('/index1')
def index1():
time.sleep(2)
return render_template('test.html')
@app.route('/index2')
def index2():
time.sleep(2)
return render_template('test.html')
if __name__ == '__main__':
app.run()
jiexi.py
import aiohttp
import asyncio
import time
from lxml import etree
start = time.time()
urls = [
'http://127.0.0.1:5000/index',
'http://127.0.0.1:5000/index1',
'http://127.0.0.1:5000/index2',
]
# 特殊的函数:请求发送和响应数据的捕获
# 细节1:在每一个with前加上async
# 细节2:在每一个阻塞操作的前边加上await
async def get_request(url):
async with aiohttp.ClientSession() as s:
# with s.get(url, headers,proxy="http://ip:port")
async with await s.get(url) as response:
page_text = await response.text() # read()返回的时byte
return page_text
# 回调函数
def parse(task):
page_text = task.result()
tree = etree.HTML(page_text)
parse_data = tree.xpath('//li/text()')
print(parse_data)
tasks = []
for url in urls:
c = get_request(url)
task = asyncio.ensure_future(c)
task.add_done_callback(parse)
tasks.append(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
print(time.time() - start)
"""
['i am hero', 'i am superman', 'i am spider']
['i am hero', 'i am superman', 'i am spider']
['i am hero', 'i am superman', 'i am spider']
2.0393407344818115
"""
import time
from selenium import webdriver
bro = webdriver.Chrome(executable_path='chromedriver.exe')
bro.get('https://www.jd.com')
time.sleep(1)
# 进行标签定位
search_input = bro.find_element_by_id('key')
search_input.send_keys('mac pro')
btn = bro.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
btn.click()
time.sleep(2)
# 滚轮转动
# window.scrollTo(0,document.body.scrollHeight)
# 执行js
bro.execute_script('window.scrollTo(0,document.body.scrollHeight)')
time.sleep(2)
# 获取页面源码数据
page_text = bro.page_source
print(page_text)
time.sleep(2)
bro.quit()
"""
爬取药监总局相关企业的详细信息:
http://scxk.nmpa.gov.cn:81/xk/
"""
from time import sleep
from selenium import webdriver
from lxml import etree
bro = webdriver.Chrome(executable_path='chromedriver.exe')
bro.get('http://scxk.nmpa.gov.cn:81/xk/')
sleep(1)
page_text = bro.page_source
page_text_list = [page_text]
for i in range(3):
bro.find_element_by_id('pageIto_next').click() # 点击下一页
sleep(1)
page_text_list.append(bro.page_source)
for page_text in page_text_list:
tree = etree.HTML(page_text)
li_list = tree.xpath('//*[@id="gzlist"]/li')
for li in li_list:
title = li.xpath('./dl/@title')[0]
num = li.xpath('./ol/@title')[0]
print(title+':'+num)
sleep(2)
bro.quit()
动作量
一系列连续的动作
from selenium.webdriver import ActionChains
实现标签定位时,如果发现定位的标签是存在于iframe标签之中的,则在定位时必须执行一个固定的操作:bro.switch_to.fram(‘iframe_id’)
拖到=点击+滑动
action =ActionChains(bro)
action.click_and_hold(div_tag)
for i in range(5):
# perform 让动作链立即执行
action.move_by_offset(17, 5).perform()
action.release()
模拟登录
"""
https://www.cnblogs.com/bobo-zhang/articles/10815914.html
模拟登录12306:
https://kyfw.12306.cn/otn/login/init
"""
from time import sleep
from selenium import webdriver
from chaojiying_Python.chaojiying import Chaojiying_Client
from selenium.webdriver import ActionChains
from PIL import Image
from lxml import etree
def get_text(imgPath, imgType):
chaojiying = Chaojiying_Client('用户名', '密码', '911057') # 用户中心>>软件ID 生成一个替换 96001
im = open('imgPath', 'rb').read() # 本地图片文件路径 来替换 a.jpg 有时WIN系统须要//
return chaojiying.PostPic(im, imgType)['pic_str']
bro = webdriver.Chrome(executable_path='chromedriver.exe')
bro.get('https://kyfw.12306.cn/otn/login/init')
sleep(1)
# 截图
bro.save_screenshot('main.png')
code_img_tag = bro.find_element_by_xpath('//*[@id="loginForm"]/div/ul[2]/li[4]/div/div/div[3]/img')
location = code_img_tag.location # 左下角的坐标
size = code_img_tag.size # 长和宽
print(location, size)
# 裁剪的区域范围
rangle = (int(location['x']), int(location['y']), int(location['x']+size['width']), int(location['y']+size['height']))
i = Image.open('main.png')
code_img_name = 'code.png'
frame = i.crop(rangle)
frame.save(code_img_name)
result = get_text('code.png', 9004)
# print(img_location) # '55,70|267,133' == [[55,70], [267,133]]
all_list = []
if '|' in result:
list_1 = result.split('|')
count_1 = len(list_1)
for i in range(count_1):
xy_list = []
x = int(list_1[i].split(',')[0])
y = int(list_1[i].split(',')[1])
xy_list.append(x)
xy_list.append(y)
all_list.append(xy_list)
else:
x = int(result.split(',')[0])
y = int(result.split(',')[1])
xy_list = []
xy_list.append(x)
xy_list.append(y)
all_list.append(xy_list)
print(all_list)
for a in all_list:
x = a[0]
y = a[1]
action = ActionChains(bro)
# 先把坐标原点偏移到验证码的原点
action.move_to_element_with_offset(code_img_tag, x, y).click().perform()
sleep(1)
bro.find_element_by_id('username').send_keys('[email protected]')
bro.find_element_by_id('password').send_keys('123456')
bro.find_element_by_id('loginSub').click()
sleep(3)
# bro.quit()