0.python-简单的爬虫示例(ip138)

import requests
import xml.etree.ElementTree as ET

from xml.parsers.expat import ParserCreate

class DefaultSaxHandler(object):
     def __init__(self, provinces):
        self.provinces = provinces

     def start_element(self, name, attrs):
        if name != 'map':
            name = attrs['title']
            number = attrs['href']
            self.provinces.append((name, number))

     def end_element(self,name):
        pass

     def char_data(self,text):
         pass

def get_provinces(url):#http://www.ip138.com/post/
    content = requests.get(url).content.decode('gb2312')#取得页面内容并转成gb2312
    start = content.find('')#F12参看页面内容
    end  = content.find('')
    content = content[start:end + len('')].strip()#strip用于移除字符串头尾指定的字符(默认为空格或换行符)或字符序列。
    print(content)
    provinces = []
    handler = DefaultSaxHandler(provinces)
    parser = ParserCreate()
    parser.StartElementHandler = handler.start_element #参看帮助文档
    parser.EndElementHandler = handler.end_element
    parser.CharacterDataHandler = handler.char_data
    parser.Parse(content)
    return provinces

def find_provinces(provinces,name):
    for word in provinces:
        if word[0] == name:
            print(word)
            return word[1]


provinces = get_provinces("http://www.ip138.com/post/")
print(provinces)
print(find_provinces(provinces,"宁夏"))

  
  

你可能感兴趣的:(python)