这是天池竞赛中数据集下载的csv文件,在这里当然也可以用wget在linux中进行下载,但这样感觉不太cool。
写了个python程序,文件夹栏
import pandas as pd
import os
from urllib.request import urlretrieve
class download(object):
def __init__(self):
super(download, self).__init__()
def start(self):
root=os.getcwd()
data=pd.read_csv(root+"/steet_character_detector/data/mchar_data_list_0515.csv")
for i in range(data.shape[0]):
url=data.iloc[i,2]
print(url)
local=url.split("/")[-1]
local=root+"/steet_character_detector/data/"+local
try:
urlretrieve(url, local,self.callbackfunc) #
except Exception as e:
print ("Exception")
def callbackfunc(self,blocknum, blocksize, totalsize):
'''回调函数
@blocknum: 已经下载的数据块
@blocksize: 数据块的大小
@totalsize: 远程文件的大小
'''
print ("blocknum ",blocknum,"blocksize ",blocksize,"totalsize ",totalsize)
percent = 100.0 * blocknum * blocksize / totalsize
if percent > 100:
percent = 100
print ("%.2f%%" % percent," ",blocknum *blocksize," ",totalsize)
root=os.getcwd()
data=pd.read_csv(root+"/steet_character_detector/data/mchar_data_list_0515.csv")
print(data.head())
if __name__ == '__main__':
d = download()
d.start()