读取csv并通过url下载照片,制作数据集

读取csv并通过url下载照片,制作数据集

  1. 首先准备一个csv文件
    在这里插入图片描述
import csv

#打开.csv提取URL存入列表a
with open('青苹果.csv', 'r',encoding='utf-8') as f:
    a = []
    url = ""
    reader = csv.reader(f)
    for row in reader:
        print(row)
    #     a.append(row[1][12:-2])
    #     url = row[1][12:-2]
    #
    # print(a)

拥有id,oss_data,label三个key(关键字),这里print一下,row的类型是list(列表)
读取csv并通过url下载照片,制作数据集_第1张图片
2. 接着遍历截取url并下载到本地

#下载图片到本地./image文件夹
def request_download(oss,url):
    r = requests.get(url)
    with open("./image/"+str(oss)+".png", 'wb') as f:
        f.write(r.content)

3.最后制作数据集,代码如下

def img_tra():
    for k in range(0,num):
        currentpath=folder+"/"+imglist[k]
        im=Image.open(currentpath)
        #width=im.size[0]
        #height=im.size[1]
        x_s=32
        y_s=32
        out = im.resize((x_s,y_s),Image.ANTIALIAS)
        out.save(folder_ad+"/"+str(imglist[k]))
def addWord(theIndex,word,adder):
    theIndex.setdefault(word,[]).append(adder)
def seplabel(fname):
    filestr=fname.split(".")[0]
    label=int(filestr.split("_")[0])
    return label
def mkcf():
    global data
    global list1
    global list2
    global list3
    for k in range(0,num):
        currentpath=folder_ad+"/"+imglist[k]
        im=Image.open(currentpath)
        with open(binpath, 'a') as f:
            for i in range (0,32):
                for j in range (0,32):
                    cl=im.getpixel((i,j))
                    list1.append(cl[0])
            for i in range (0,32):
                for j in range (0,32):
                    cl=im.getpixel((i,j))
                    list1.append(cl[1])
            for i in range (0,32):
                for j in range (0,32):
                    cl=im.getpixel((i,j))
                    list1.append(cl[2])
        list2.append(list1)
        list1=[]
        f.close()
        print("image"+str(k+1)+"saved.")
        list3.append(imglist[k].encode('utf-8'))
    arr2=np.array(list2,dtype=np.uint8)
    data['batch_label'.encode('utf-8')]='testing batch 1 of 1'.encode('utf-8')
    data.setdefault('labels'.encode('utf-8'),label)
    data.setdefault('data'.encode('utf-8'),arr2)
    data.setdefault('filenames'.encode('utf-8'),list3)
    output = open(binpath, 'wb')
    pickle.dump(data, output)
    output.close()
folder="./image"
folder_ad="./image"
imglist=listdir(folder_ad)
num=len(imglist)
img_tra()
label=[]
for i in range (0,num):
    label.append(seplabel(imglist[i]))
binpath="./image/test_batch"
print(binpath)
mkcf()
  1. 至此就做好了一个名为“test_batch”数据集
    读取csv并通过url下载照片,制作数据集_第2张图片
  2. 同时用一下方法读取数据集

import pickle

def load(filename):
    with open(filename, 'rb') as fo:
        data = pickle.load(fo, encoding='latin1')
    return data
d = 'test_batch'
dataset_path_1 = load(d)
print(dataset_path_1.keys())
print(dataset_path_1)
# print(dataset_path_1['batch_label'])
print(dataset_path_1[b'labels'])
# print(dataset_path_1['data'].shape)
# print(dataset_path_1['filenames'])

结果如下:
读取csv并通过url下载照片,制作数据集_第3张图片

完整代码如下:

import csv
import requests
import os
from os import listdir
import pickle as pickle
import numpy as np
from PIL import Image
os.makedirs('./image/', exist_ok=True)  #创建一个文件夹image
data={}
list1=[]
list2=[]
list3=[]

#下载图片到本地./image文件夹
def request_download(oss,url):
    r = requests.get(url)
    with open("./image/"+str(oss)+".png", 'wb') as f:
        f.write(r.content)

#打开.csv提取URL存入列表a
with open('青苹果,一个梨,羊驼.csv', 'r',encoding='utf-8') as f:
    a = []
    url = ""
    reader = csv.reader(f)
    for row in reader:
        a.append(row[1][12:-2])
        url = row[1][12:-2]

#遍历a.获取url下载图片
for oss in range(len(a)-1):
    oss = oss + 1
    request_download(oss,a[oss])
    print(a[oss])


""""""""""""""
def img_tra():
    for k in range(0,num):
        currentpath=folder+"/"+imglist[k]
        im=Image.open(currentpath)
        #width=im.size[0]
        #height=im.size[1]
        x_s=32
        y_s=32
        out = im.resize((x_s,y_s),Image.ANTIALIAS)
        out.save(folder_ad+"/"+str(imglist[k]))
def addWord(theIndex,word,adder):
    theIndex.setdefault(word,[]).append(adder)
def seplabel(fname):
    filestr=fname.split(".")[0]
    label=int(filestr.split("_")[0])
    return label
def mkcf():
    global data
    global list1
    global list2
    global list3
    for k in range(0,num):
        currentpath=folder_ad+"/"+imglist[k]
        im=Image.open(currentpath)
        with open(binpath, 'a') as f:
            for i in range (0,32):
                for j in range (0,32):
                    cl=im.getpixel((i,j))
                    list1.append(cl[0])
            for i in range (0,32):
                for j in range (0,32):
                    cl=im.getpixel((i,j))
                    list1.append(cl[1])
            for i in range (0,32):
                for j in range (0,32):
                    cl=im.getpixel((i,j))
                    list1.append(cl[2])
        list2.append(list1)
        list1=[]
        f.close()
        print("image"+str(k+1)+"saved.")
        list3.append(imglist[k].encode('utf-8'))
    arr2=np.array(list2,dtype=np.uint8)
    data['batch_label'.encode('utf-8')]='testing batch 1 of 1'.encode('utf-8')
    data.setdefault('labels'.encode('utf-8'),label)
    data.setdefault('data'.encode('utf-8'),arr2)
    data.setdefault('filenames'.encode('utf-8'),list3)
    output = open(binpath, 'wb')
    pickle.dump(data, output)
    output.close()
folder="./image"
folder_ad="./image"
imglist=listdir(folder_ad)
num=len(imglist)
img_tra()
label=[]
for i in range (0,num):
    label.append(seplabel(imglist[i]))
binpath="./image/test_batch"
print(binpath)
mkcf()


if __name__ == '__main__':
    pass

你可能感兴趣的:(python)