背景:
采用httplib进行网络数据的下载
代码:
#coding:utf-8
import httplib
file1 = open("ids.txt")
krcids = []
while 1:
line = file1.readline()
if not line:
break
line = line.strip('\n')#去掉行尾的换行
str = line.split('\t')#对于一行存储多个字段信息的情况,需要进行字符串的分割,每一行存储两个字段信息,即按照列进行分割
lislen = len(str)
krcids.append(str[0])
hashs.append(str[1])
# do something
h1 = httplib.HTTPConnection('142.62.29.125', 8071, 10)
for tempkrc in krcids:
print(tempkrc)
url = "/gettext?id=" + tempkrc
h1.request('GET', url)
response = h1.getresponse()
print response.status
print response.reason
print response.read()
print response.getheaders() #获取头信息
tempwrite = response.read() tempwrite = tempwrite.strip() #逐行操作,去除空白行 print(tempwrite) outfile = tempkrc + ".txt" output = open(outfile, 'w') output.write(tempwrite)h1.close()
注意:
不同版本的python,如2和3之间的差别还是比较大的。上述代码为python2环境下运行的。python3下的代码修改为:
import http.client
import urllib
import hashlib
import time
token = "ieK3keIO90^O39@bk2-kd"
clienttime1 = int(time.time())
acc_hash = token + str(clienttime1)
acc_hash = acc_hash.encode('utf-8')
key1 = hashlib.md5()
key1.update(acc_hash)
key1 = key1.hexdigest()
param = {
'source': 13,
'sn': 41600628,
'clienttime': clienttime1,
'key': key1
}
param = urllib.parse.urlencode(param)
#url1 = "www.baidu.com"
url1 = "data.media.kgidc.cn/index.php?m=audio"# % urllib.parse.urlencode(param)
print(url1)
headers = {"Content-type": "application/x-www-form-urlencoded", "Accept": "text/plain"}
h2 = http.client.HTTPConnection(url1)
h2.request("POST", "", param, headers)
#response = h2.getresponse()
# tempwrite = response.read()
# print(tempwrite)