下面以获取IP地址的物理位置作简要记录,以备后用:
#!/usr/bin/env python # -*- coding:utf-8 -*- import urllib2, HTMLParser, re class IPParser(HTMLParser.HTMLParser): def __init__(self): HTMLParser.HTMLParser.__init__(self) url = "http://iframe.ip138.com/ic.asp" try: fp = urllib2.urlopen(url, timeout=5) souce = fp.read() fp.close() self.feed(souce) except: print "So sorry!" def handle_starttag(self, tag, attrs): self.flag = tag if tag == "meta": tmp = re.findall("charset=([A-Za-z0-9-]*)", str(attrs)) if tmp: self.code = tmp[0] def handle_data(self, data): if self.flag == "center": self.info = data#.decode(self.code).encode("UTF-8") def handle_endtag(self, tag): if tag == "center": print self.info.decode(self.code).encode("UTF-8") if __name__ == "__main__": IPParser()
对于上面的获取方式还可以:
html = urllib2.urlopen("http://iframe.ip138.com/ic.asp").read().decode("GB2312") print re.findall("<center>(.*)</center>", html)[0].encode("UTF-8")