tags:
categories:
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.setblocking(False)
# 建立连接三次握手 这个函数是阻塞的
client.connect((host, 80))
client.recv(1024)
import socket
from urllib.parse import urlparse
# 使用非阻塞io完成http请求
def get_url(url):
# 通过socket请求html
url = urlparse(url)
host = url.netloc
path = url.path
if path == "":
path = "/"
# 建立socket连接
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.setblocking(False)
try:
client.connect((host, 80)) # 阻塞不会消耗cpu
except BlockingIOError as e:
pass
# 不停的询问连接是否建立好, 需要while循环不停的去检查状态
# 做计算任务或者再次发起其他的连接请求
while True:
try:
client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(path, host).encode("utf8"))
break
except OSError as e:
pass
data = b""
while True:
try:
d = client.recv(1024)
except BlockingIOError as e:
continue
if d:
data += d
else:
break
data = data.decode("utf8")
html_data = data.split("\r\n\r\n")[1]
print(html_data)
client.close()
if __name__ == "__main__":
get_url("http://www.baidu.com")
import socket
from urllib.parse import urlparse
from selectors import DefaultSelector, EVENT_READ, EVENT_WRITE
selector = DefaultSelector()
# 使用select完成http请求
urls = []
stop = False
class Fetcher:
def connected(self, key):
# fd是self.client.fileno()返回值
selector.unregister(key.fd)
self.client.send("GET {} HTTP/1.1\r\nHost:{}\r\nConnection:close\r\n\r\n".format(self.path, self.host).encode("utf8"))
# 接收数据 继续监听读的状态
selector.register(self.client.fileno(), EVENT_READ, self.readable)
def readable(self, key):
d = self.client.recv(1024)
# 不能用while因为获取一次后并不能获取完全,第二次循环过来就会报错。因为内核数据没准备好
if d:
self.data += d
else:
# 数据读完 取消掉
selector.unregister(key.fd)
data = self.data.decode("utf8")
html_data = data.split("\r\n\r\n")[1]
print(html_data)
self.client.close()
urls.remove(self.spider_url)
if not urls:
global stop
stop = True
def get_url(self, url):
self.spider_url = url
url = urlparse(url)
self.host = url.netloc
self.path = url.path
self.data = b""
if self.path == "":
self.path = "/"
# 建立socket连接 设置非阻塞IO
self.client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.client.setblocking(False)
try:
self.client.connect((self.host, 80)) # 阻塞不会消耗cpu
except BlockingIOError as e:
pass
# 注册 看参数fileobj就是我们的socket data就是我们的回调函数
# fileno() socket的文件描述符, EVENT_WRITE写事件, 变成可写事调用self.connected
selector.register(self.client.fileno(), EVENT_WRITE, self.connected)
# 如果把send写到这就是阻塞啦, 使用回调模式
def loop():
# 事件循环,不停的请求socket的状态并调用对应的回调函数
# 1. select本身是不支持register模式
# 2. socket状态变化以后的回调是由程序员完成的
# 通过stop判读防止windows下默认为select少参数的报错,因为url没有就不调用
while not stop:
# windows下默认为select linux下是epoll
ready = selector.select()
for key, mask in ready:
call_back = key.data
call_back(key)
# 回调+事件循环+select(poll\epoll)
if __name__ == "__main__":
fetcher = Fetcher()
import time
start_time = time.time()
for url in range(20):
url = "http://shop.projectsedu.com/goods/{}/".format(url)
urls.append(url)
fetcher = Fetcher()
fetcher.get_url(url)
loop()
print(time.time()-start_time)
def gen_func():
html = yield "http://projectsedu.com"
print(html)
yield 2
yield 3
return "bobby"
if __name__ == "__main__":
gen = gen_func()
print(next(gen))
print(next(gen))
print(next(gen))
print(next(gen)) # 报错
def gen_func():
# 1. 可以产出值
# 2. 可以接收值(调用方传递进来的值)
html = yield "http://projectsedu.com"
print(html)
yield 2
yield 3
return "bobby"
# 生成器不只可以产出值,还可以接收值
if __name__ == "__main__":
gen = gen_func()
# 在调用send发送非none值之前,我们必须启动一次生成器(让它卡在yield处)
# 启动方式有两种1. gen.send(None), 2. next(gen)
# next(gen)效果一样, 到第一个yield处停止。返回yield后的值,yield前html并没有赋值
url = gen.send(None)
print(url)
html = "bobby"
# send方法可以传递值进入生成器内部,同时还可以重启生成器执行到下一个yield位置
# 发送html, 返回第二个yield的值
print(gen.send(html))
print(gen.send(html))
def gen_func():
try:
yield "http://projectsedu.com"
# 注意: 这里什么都没有做pass, 它会抛出异常RuntimeError到close方法上,
# 因为pass后继续执行到yield 2但是已经close啦, 注释后面的yield就没有问题或者加上处理
# 如果我们处理raise StopIteration。就不会报异常啦
except GeneratorExit:
raise StopIteration
# GeneratorExit是继承自BaseException, Exception.所以下面方法捕获不到异常的
# except Exception:
# pass
yield 2
yield 3
return "bobby"
if __name__ == "__main__":
gen = gen_func()
print(next(gen))
gen.close()
print("bobby")
# next(gen) # 抛出StopIteration异常
def gen_func():
try:
yield "http://projectsedu.com"
except Exception as e:
print("异常处理")
pass
print("位置1")
yield 2
print("位置2")
yield 3
return "bobby"
if __name__ == "__main__":
gen = gen_func()
print(next(gen))
# 虽然上面把"http://projectsedu.com"yield出来,
# 下面抛出的异常依然是yield "http://projectsedu.com"的异常
a = gen.throw(Exception, "download error")
print(a) # 这里是2
print(next(gen)) # 这里是3
gen.throw(Exception, "download error")
# python3.3新加了yield from语法
from itertools import chain
my_list = [1, 2, 3]
my_dict = {
"bobby1": "http://projectsedu.com",
"bobby2": "http://www.imooc.com",
}
# chain的用处是把所有可迭代对象循环获得值
# for value in chain(my_list, my_dict, range(5, 10)):
# print(value)
# 做个yield from的测试
# def g1(iterable):
# yield iterable
#
# def g2(iterable):
# yield from iterable
#
# # range(0, 10)
# for value in g1(range(10)):
# print(value)
# # 0, 1, ... 9
# for value in g2(range(10)):
# print(value)
# 自己实现一个chain的功能
def my_chain(*args, **kwargs):
for my_iterable in args:
# yield from 和下面同样的效果,但是它的功能远不止于此。看下面的例子
yield from my_iterable
# for value in my_iterable:
# yield value
for value in my_chain(my_list, my_dict, range(5, 10)):
print(value)
# main 调用方 g1(委托生成器) gen 子生成器
# yield from会在调用方与子生成器之间建立一个双向通道, 有了yield from它就直接发送给我们的子生成器gen
def g1(gen):
yield from gen
def main():
g = g1()
g.send(None)
final_result = {}
def sales_sum(pro_name):
total = 0
nums = []
while True:
x = yield
print(pro_name+"销量: ", x)
if not x:
break
total += x
nums.append(x)
return total, nums
def middle(key):
while True:
# 最后返回值 yield from 帮我们捕获了StopIteration并取值
# yield from还帮我们做了很多事
final_result[key] = yield from sales_sum(key)
print(key+"销量统计完成!!.")
def main():
data_sets = {
"bobby牌面膜": [1200, 1500, 3000],
"bobby牌手机": [28, 55, 98, 108],
"bobby牌大衣": [280, 560, 778, 70],
}
for key, data_set in data_sets.items():
print("start key:", key)
m = middle(key)
# 预激middle协程
m.send(None)
for value in data_set:
# 给协程传递每一组的值 通过yield from相当于调用子生成器sales_sum
m.send(value)
m.send(None)
print("final_result:", final_result)
if __name__ == '__main__':
main()
# 如果直接调用需要自己加上异常捕获
# if __name__ == "__main__":
# my_gen = sales_sum("bobby牌手机")
# my_gen.send(None)
# my_gen.send(1200)
# my_gen.send(1500)
# my_gen.send(3000)
# try:
# my_gen.send(None)
# except StopIteration as e:
# result = e.value
# print(result)
#pep380
#1. RESULT = yield from EXPR可以简化成下面这样
#一些说明
"""
_i:子生成器,同时也是一个迭代器
_y:子生成器生产的值
_r:yield from 表达式最终的值
_s:调用方通过send()发送的值
_e:异常对象
"""
# EXPR是一个可迭代对象,_i其实是子生成器;
_i = iter(EXPR)
try:
# 预激子生成器,把产出的第一个值存在_y中;
_y = next(_i)
except StopIteration as _e:
# 如果抛出了`StopIteration`异常,那么就将异常对象的`value`属性保存到_r,这是最简单的情况的返回值;
_r = _e.value
else:
# 尝试执行这个循环,委托生成器会阻塞;
while 1:
# 生产子生成器的值,等待调用方`send()`值,发送过来的值将保存在_s中;
_s = yield _y
try:
# 转发_s,并且尝试向下执行;
_y = _i.send(_s)
except StopIteration as _e:
# 如果子生成器抛出异常,那么就获取异常对象的`value`属性存到_r,退出循环,恢复委托生成器的运行;
_r = _e.value
break
# _r就是整个yield from表达式返回的值。
RESULT = _r
"""
yield from 处理下面的情况代码
1. 子生成器可能只是一个迭代器,并不是一个作为协程的生成器,所以它不支持.throw()和.close()方法;
2. 如果子生成器支持.throw()和.close()方法,但是在子生成器内部,这两个方法都会抛出异常;
3. 调用方让子生成器自己抛出异常
4. 当调用方使用next()或者.send(None)时,都要在子生成器上调用next()函数,当调用方使用.send()发送非 None 值时,才调用子生成器的.send()方法;
"""
_i = iter(EXPR)
try:
_y = next(_i)
except StopIteration as _e:
_r = _e.value
else:
while 1:
try:
_s = yield _y
except GeneratorExit as _e:
try:
_m = _i.close
except AttributeError:
pass
else:
_m()
raise _e
except BaseException as _e:
_x = sys.exc_info()
try:
_m = _i.throw
except AttributeError:
raise _e
else:
try:
_y = _m(*_x)
except StopIteration as _e:
_r = _e.value
break
else:
try:
if _s is None:
_y = next(_i)
else:
_y = _i.send(_s)
except StopIteration as _e:
_r = _e.value
break
RESULT = _r
#生成器是可以暂停的函数
import inspect
def gen_func():
value = yield from
# 第一返回值给调用方, 第二调用方通过send方式返回值给gen
return "bobby"
if __name__ == "__main__":
gen = gen_func()
print(inspect.getgeneratorstate(gen)) # 生成器状态GEN_CREATED
next(gen)
print(inspect.getgeneratorstate(gen)) # 生成器状态GEN_SUSPENDED
try:
next(gen)
except StopIteration:
pass
print(inspect.getgeneratorstate(gen)) # 生成器状态GEN_CLOSED
# 1. 用同步的方式编写异步的代码, 在适当的时候暂停函数并在适当的时候启动函数
# 下面是伪代码
import socket
def get_socket_data():
yield "bobby"
def downloader(url):
client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
client.setblocking(False)
try:
client.connect((host, 80)) # 阻塞不会消耗cpu
except BlockingIOError as e:
pass
selector.register(self.client.fileno(), EVENT_WRITE, self.connected)
# 从socket中读数据
source = yield from get_socket_data()
data = source.decode("utf8")
html_data = data.split("\r\n\r\n")[1]
print(html_data)
def download_html(html):
html = yield from downloader()
if __name__ == "__main__":
# 协程的调度依然是 事件循环+协程模式 ,协程是单线程模式
pass
import types
# 实现了__await__魔法方法就可以await
# from collections import Awaitable
# 装饰之后实现了__await__
# @types.coroutine
# def downloader(url):
# yield "bobby"
async def downloader(url):
return "bobby"
async def download_url(url):
# do somethings
# async中不能定义yield 会报异常的, await只能出现在async中
html = await downloader(url)
return html
if __name__ == "__main__":
coro = download_url("http://www.imooc.com")
# next(None) 原生协程 不能用这个会报错
# 因为直接返回"bobby", 不能向上继续抛 回报StopIteration
coro.send(None)