Python3根据本地ip进行网络比对ip具体地址

import requests
import re
import csv
import threading
from threading import Thread
glock = threading.Lock()

headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
           'Accept-Encoding': 'gzip, deflate',
           'Accept-Language': 'zh-CN,zh;q=0.9',
           'Cache-Control': 'max-age=0',
           'Connection': 'keep-alive',
           'Cookie': 'pgv_pvi=6130555904; pgv_si=s1252968448; ASPSESSIONIDACASSTQA=LLBPLFKBIGBEMENEMPBJJJFI',
           'Host': 'www.ip138.com',
           'Referer': 'http://www.ip138.com/',
           'Upgrade-Insecure-Requests': '1',
           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'}


#   将所有ip放入iplistall这个列表中
def iplist(ipAddress):
    list = []
    ipCount = 1
    csvFile = csv.reader(open(ipAddress, 'r'))
    for line in csvFile:
        if ipCount != 1:
            reslut = (line[0] + ',' + line[1])
            list.append(reslut)
        ipCount = ipCount + 1
    return list


# 请求获取归属地函数
def req1(reqlist1):
    for line in reqlist1:
        realip = line.split(',')[0]
        count = line.split(',')[1]
        url = 'http://www.ip138.com/ips138.asp?ip=%s&action=2' % realip
        try:
            response = requests.get(url=url, headers=headers, timeout=5)
            response.encoding = "GBK"
            ip = re.findall(r'
  • 本站数据:(.*?)
  • .*?
', response.text) try: glock.acquire() out = open(r'ip归属地查询.csv', 'a', newline='') csv_writer = csv.writer(out, dialect='excel') csv_writer.writerow((realip, ip[0], count)) print("ip :" + line + ",归属地:" + ip[0]) glock.release() except: print(realip + '查询失败~~~') glock.acquire() out = open(r'请求失败ip.csv', 'a', newline='') csv_writer = csv.writer(out, dialect='excel') csv_writer.writerow((realip, count, '查询失败')) glock.release() except: print('请求异常~' + realip) glock.acquire() out = open(r'请求失败ip.csv', 'a', newline='') csv_writer = csv.writer(out, dialect='excel') csv_writer.writerow((realip, count, '请求失败')) glock.release() # 主函数 if __name__ == '__main__': #本地ip的csv文件,我的是这样的,第一个ip,第二是统计数【113.96.231.120】【33】 【2019/5/23 9:04:24】 ipaddress = '**本地ip的csv文件**' iplistall = iplist(ipaddress) count = (len(iplistall)) one = int(count / 6) reqlist1 = [] reqlist2 = [] reqlist3 = [] reqlist4 = [] reqlist5 = [] reqlist6 = [] for i in range(1, one): reqlist1.append(iplistall[i-1]) for i in range(one, one*2): reqlist2.append(iplistall[i-1]) for i in range(one*2, one*3): reqlist3.append(iplistall[i-1]) for i in range(one*3, one*4): reqlist4.append(iplistall[i-1]) for i in range(one*4, one*5): reqlist5.append(iplistall[i-1]) for i in range(one*5, count+1): reqlist6.append(iplistall[i-1]) t1 = Thread(target=req1, args=(reqlist1,)) t1.start() print('t1线程开始!') t2 = Thread(target=req1, args=(reqlist2,)) t2.start() print('t2线程开始!') t3 = Thread(target=req1, args=(reqlist3,)) t3.start() print('t3线程开始!') t4 = Thread(target=req1, args=(reqlist4,)) t4.start() print('t4线程开始!') t5 = Thread(target=req1, args=(reqlist5,)) t5.start() print('t5线程开始!') t6 = Thread(target=req1, args=(reqlist6,)) t6.start() print('t6线程开始!')

将本地文件的csv的ip,分成了6组,这里没加判断你,只能是大于6个的。然后把这些ip分别放在6个列表中。请求函数对列表进行遍历。当然对csv进行读写的时候加一个锁。
这是一个比较简单的多线程,也是我工作遇到问题的时候写的。

你可能感兴趣的:(爬虫)