Python检测URL状态,并追加保存200的URL:
1.Requests
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
#! /usr/bin/env python
#coding=utf-8
import sys
import requests
def getHttpStatusCode(url):
try :
request = requests.get(url)
httpStatusCode = request.status_code
return httpStatusCode
except requests.exceptions.HTTPError as e:
return e
if __name__ = = "__main__" :
with open ( '1.txt' , 'r' ) as f:
for line in f:
try :
status = getHttpStatusCode(line.strip( '\n' )) #换行符
if status = = 200 :
with open ( '200.txt' , 'a' ) as f:
f.write(line + '\n' )
print line
else :
print 'no 200 code'
except Exception as e:
print e
|
1 #! /usr/bin/env python
2 # -*--coding:utf-8*-
3
4 import requests
5
6 def request_status(line):
7 conn = requests.get(line)
8 if conn.status_code == 200:
9 with open('url_200.txt', 'a') as f:
10 f.write(line + '\n')
11 return line13 else:
14 return None
15
16
17 if __name__ == '__main__':
18 with open('/1.txt', 'rb') as f:
19 for line in f:
20 try:
21 purge_url = request_status(line.strip('\n'))
22 except Exception as e:
23 pass
2.Urllib
#! /usr/bin/env python
#coding:utf-8
import os,urllib,linecache
import sys
result = list()
for x in linecache.updatecache(r'1.txt'):
try:
a = urllib.urlopen(x.replace('/n','')).getcode()
#print x,a
except Exception,e:
print e
if a == 200:
#result.append(x) #保存
#result.sort() #排序结果
#open('2.txt', 'w').write('%s' % '\n'.join(result)) #保存入结果文件
with open ('200urllib.txt','a') as f: ## r只读,w可写,a追加
f.write(x + '\n')
else:
print 'error'