使用协程爬取网页,计算网页数据大小

from gevent import monkey; monkey.patch_all()
import gevent
from urllib import request


def get_url(url):
    ret = request.urlopen(url)
    content = ret.read().decode("UTF-8")
    return len(content)


g1 = gevent.spawn(get_url, "http://www.baidu.com")
g2 = gevent.spawn(get_url, "http://www.jcgzsstudio.com")
g3 = gevent.spawn(get_url, "http://www.hao123.com")

gevent.joinall([g1, g2, g3])
print(g1.value)
print(g2.value)
print(g3.value)
  • 这里试了几个网址,现在都是https的了,所以http有一些不成功。怎么打开https的网址,等学爬虫的时候再解决。

使用循环来实现:

from gevent import monkey; monkey.patch_all()
import gevent
from urllib import request


def get_url(url):
    ret = request.urlopen(url)
    content = ret.read().decode("UTF-8")
    return len(content)


url_list = [
    "http://www.baidu.com",
   "http://www.jcgzsstudio.com",
    "http://www.hao123.com"
]

g_list = []
for url in url_list:
    g = gevent.spawn(get_url, url)
    g_list.append(g)
gevent.joinall(g_list)
for g in g_list:
    print(g.value)

你可能感兴趣的:(使用协程爬取网页,计算网页数据大小)