自从会了爬虫妈妈再也不担心我不会植物分类啦

代码

# -*- coding: UTF-8 -*-
import urllib
import urllib2
import re
name = str(raw_input('请输入物种名:'))
values={}
values['id'] = name
data = urllib.urlencode(values)
url = "http://frps.iplant.cn/frps"
geturl = url + "?"+data
request = urllib2.Request(geturl)
response = urllib2.urlopen(request)
content = response.read()
if "响应时间" in content:
    pattern = re.compile('24px.*?b>.*?\. (.*?)<', re.S)
    species = re.findall(pattern, content)
    species = species[0].replace(' ', ',').replace(',', ',')
    pattern = re.compile('16px.*?Arial.*?b>(.*?)<.*?(.*?)', re.S)
    name = re.findall(pattern, content)
    name = " ".join(name[0])
    pattern = re.compile('(.*?)
', re.S) content = re.findall(pattern, content) # ?:的意思是不会作为子模式被查找 pattern = re.compile('(.*?(?:门|纲).*?)<', re.S) # pattern = re.compile('([^<]*)<', re.S) items = re.findall(pattern, content[0]) pattern = re.compile('(.*?), re.S) items += re.findall(pattern, content[0]) items.append(species+" "+name) for item in items: print item else: url = "https://zh.wikipedia.org/wiki/" geturl = url+data request = urllib2.Request(geturl) response = urllib2.urlopen(request) content = response.read() print content

原始页面

自从会了爬虫妈妈再也不担心我不会植物分类啦_第1张图片

查询效果

自从会了爬虫妈妈再也不担心我不会植物分类啦_第2张图片

你可能感兴趣的:(爬虫)