import requests
import re
import csv
import threading
from threading import Thread
glock = threading.Lock()
headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': 'pgv_pvi=6130555904; pgv_si=s1252968448; ASPSESSIONIDACASSTQA=LLBPLFKBIGBEMENEMPBJJJFI',
'Host': 'www.ip138.com',
'Referer': 'http://www.ip138.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}
# 将所有ip放入iplistall这个列表中
def iplist(ipAddress):
list = []
ipCount = 1
csvFile = csv.reader(open(ipAddress, 'r'))
for line in csvFile:
if ipCount != 1:
reslut = (line[0] + ',' + line[1])
list.append(reslut)
ipCount = ipCount + 1
return list
# 请求获取归属地函数
def req1(reqlist1):
for line in reqlist1:
realip = line.split(',')[0]
count = line.split(',')[1]
url = 'http://www.ip138.com/ips138.asp?ip=%s&action=2' % realip
try:
response = requests.get(url=url, headers=headers, timeout=5)
response.encoding = "GBK"
ip = re.findall(r'- 本站数据:(.*?)
.*?
', response.text)
try:
glock.acquire()
out = open(r'ip归属地查询.csv', 'a', newline='')
csv_writer = csv.writer(out, dialect='excel')
csv_writer.writerow((realip, ip[0], count))
print("ip :" + line + ",归属地:" + ip[0])
glock.release()
except:
print(realip + '查询失败~~~')
glock.acquire()
out = open(r'请求失败ip.csv', 'a', newline='')
csv_writer = csv.writer(out, dialect='excel')
csv_writer.writerow((realip, count, '查询失败'))
glock.release()
except:
print('请求异常~' + realip)
glock.acquire()
out = open(r'请求失败ip.csv', 'a', newline='')
csv_writer = csv.writer(out, dialect='excel')
csv_writer.writerow((realip, count, '请求失败'))
glock.release()
# 主函数
if __name__ == '__main__':
#本地ip的csv文件,我的是这样的,第一个ip,第二是统计数【113.96.231.120】【33】 【2019/5/23 9:04:24】
ipaddress = '**本地ip的csv文件**'
iplistall = iplist(ipaddress)
count = (len(iplistall))
one = int(count / 6)
reqlist1 = []
reqlist2 = []
reqlist3 = []
reqlist4 = []
reqlist5 = []
reqlist6 = []
for i in range(1, one):
reqlist1.append(iplistall[i-1])
for i in range(one, one*2):
reqlist2.append(iplistall[i-1])
for i in range(one*2, one*3):
reqlist3.append(iplistall[i-1])
for i in range(one*3, one*4):
reqlist4.append(iplistall[i-1])
for i in range(one*4, one*5):
reqlist5.append(iplistall[i-1])
for i in range(one*5, count+1):
reqlist6.append(iplistall[i-1])
t1 = Thread(target=req1, args=(reqlist1,))
t1.start()
print('t1线程开始!')
t2 = Thread(target=req1, args=(reqlist2,))
t2.start()
print('t2线程开始!')
t3 = Thread(target=req1, args=(reqlist3,))
t3.start()
print('t3线程开始!')
t4 = Thread(target=req1, args=(reqlist4,))
t4.start()
print('t4线程开始!')
t5 = Thread(target=req1, args=(reqlist5,))
t5.start()
print('t5线程开始!')
t6 = Thread(target=req1, args=(reqlist6,))
t6.start()
print('t6线程开始!')
将本地文件的csv的ip,分成了6组,这里没加判断你,只能是大于6个的。然后把这些ip分别放在6个列表中。请求函数对列表进行遍历。当然对csv进行读写的时候加一个锁。
这是一个比较简单的多线程,也是我工作遇到问题的时候写的。