用调网站的API的方法来判断
用网站名和ip地址的接口不太一样
http://ip.cn/index.php?ip=www.hao123.com
http://www.ip.cn/110.84.0.129
源码
class location_ipdotcn():
'''
build the mapping of the ip address and its location.
the geo info is from www.ip.cn
need to use PhantomJS to open the URL to render its JS
'''
def __init__(self, ip):
'''
Construction of location_ipdotcn class.
'''
self.ip = ip
self.api_url = 'http://www.ip.cn/index.php?ip=%s' % ip
def get_geoinfo(self):
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:25.0) Gecko/20100101 Firefox/29.0 ")
driver = webdriver.PhantomJS(
executable_path='/usr/local/bin/phantomjs',
desired_capabilities=dcap)
driver.get(self.api_url)
text = driver.find_element_by_xpath('//div[@id="result"]').text
res = text.split('\n')[2]
res2 = res.split(' ')
driver.quit()
return res2
def get_region(self):
return self.get_geoinfo()[3]
def get_isp(self):
return self.get_geoinfo()[2]
if __name__ == '__main__':
ip = 'www.hao123.com'
iploc = location_ipdotcn(ip)
print iploc.get_region()
print iploc.get_isp()
用官方网站的范围来计算是否中国区域的IP
cvs文件只保存了开始的IP地址和划分的个数
#/usr/bin/python
#coding: utf-8
import csv
import socket
global_ip_dict = [[[] for col in range(256)] for row in range(256)]
#input:112.0.0.0|4194304
#output:addr1,addr2,addr3,addr4,ip_num
def analyseLine(ip_line):
#ip_str_list = ip_line.split(',')
ip_str,ip_num = ip_line[0],int(ip_line[1])
ip_addr = ip_str.split('.')
if len(ip_addr) < 4:
return 0,0,0,0,0
addr1,addr2,addr3,addr4 = int(ip_addr[0]),int(ip_addr[1]),int(ip_addr[2]),int(ip_addr[3])
return addr1,addr2,addr3,addr4,ip_num
#input:
#output:True,global_ip_dict.
def init_ip_dict():
global global_ip_dict
with open('cn.csv', 'rb') as f:
reader = csv.reader(f)
for row in reader:
addr1,addr2,addr3,addr4,number_sum = analyseLine(row)
if number_sum == 0:
continue
offset = 0
while number_sum > 0:
if number_sum >= 65536:
start,end,number = 0,65535,65536
else:
start = addr3*256 + addr4
end = start + number_sum -1
number = number_sum
global_ip_dict[addr1][addr2+offset].append({'start':start,'end':end,'number':number})
number_sum -= 65536
offset += 1
return True
#input:ip string,*.*.*.* or domain name
#output:True,is china IP;False,is not china IP
def determineChinaIP(ip):
global global_ip_dict
ip_addr = ip.split('.')
if ip_addr[0].isdigit() == False:
ip_address = socket.gethostbyname(ip)
print 'gethostbyname is'
print ip_address
ip_addr = ip_address.split('.')
addr1,addr2,addr3,addr4 = int(ip_addr[0]),int(ip_addr[1]),int(ip_addr[2]),int(ip_addr[3])
if len(global_ip_dict[addr1][addr2]) == 0:
return False
addr_value = addr3*256+addr4
for item in global_ip_dict[addr1][addr2]:
if addr_value >= item['start'] and addr_value <= item['end']:
return True
return False
if __name__=="__main__":
init_ip_dict()
for addr1 in range(100,256):
for addr2 in range(100,256):
ip_str = '%s.%s.5.5' % (addr1,addr2)
if determineChinaIP(ip_str):
print '%s :china ip' % ip_str
else:
print '%s :not china ip' % ip_str