二、python小项目模块(制作.npy形式数据集、图像添加随机噪声、将.npy的数据合成图像、创建文件夹)

#都是单独的函数,朋友们要是有需要可以直接复制粘贴,内容都有标注。

'''
data:2020.10
author:Xiao Yu
'''
import os
import cv2
import numpy as np
from PIL import Image
import random
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
path = "/Users/chenzhenyu/desktop/python/create/image/0"
path_s ="/Users/chenzhenyu/desktop/cart_data/test/0"
index = {
     "京": 0, "沪": 1, "津": 2, "渝": 3, "冀": 4, "晋": 5, "蒙": 6, "辽": 7, "吉": 8, "黑": 9, "苏": 10, "浙": 11, "皖": 12,
         "闽": 13, "赣": 14, "鲁": 15, "豫": 16, "鄂": 17, "湘": 18, "粤": 19, "桂": 20, "琼": 21, "川": 22, "贵": 23, "云": 24,
         "藏": 25, "陕": 26, "甘": 27, "青": 28, "宁": 29, "新": 30,"临":31,"时":32, "0": 33, "1": 34, "2": 35, "3": 36, "4": 37, "5": 38,
         "6": 39, "7": 40, "8": 41, "9": 42, "A": 43, "B": 44, "C": 45, "D": 46, "E": 47, "F": 48, "G": 49, "H": 50,
         "J": 51, "K": 52, "L": 53, "M": 54, "N": 55, "P": 56, "Q": 57, "R": 58, "S": 59, "T": 60, "U": 61, "V": 62,
         "W": 63, "X": 64, "Y": 65, "Z": 66}
indexl = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖",
         "闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云",
         "藏", "陕", "甘", "青", "宁", "新","临","时", "0", "1", "2", "3", "4", "5",
         "6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H",
         "J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V",
         "W", "X", "Y", "Z"]
def show_label_img():
    '''将生成的数组以图像的形式存储,命名格式为:图像编号+标签+标签对应的实际含义'''
    a = np.load("/Users/chenzhenyu/desktop/finally/pure_create/x_test.npy")
    b = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_test.npy")
    #a= np.load(file="/Users/chenzhenyu/desktop/new_data/x_test.npy")
    #b= np.load(file="/Users/chenzhenyu/desktop/new_data/y_test.npy")
    print(b[1])
    print(b.shape,b.shape,len(b))
    a = a[0:200]
    b = b[0:200]
    for i in range(len(a)):
        #print(i)
        img = Image.fromarray(a[i], 'RGB')
        #print(b[i][0])
        print("/Users/chenzhenyu/desktop/new_data/show_img/"+str(i)+'_'+str(b[i][0])+'_'+str(indexl[b[i][0]])+'.jpg')
        #img.save("/Users/chenzhenyu/desktop/finally/show/"+str(i)+'_'+str(b[i][0])+'_'+str(indexl[b[i][0]])+'.jpg')
        img.save("/Users/chenzhenyu/desktop/finally/show/" + str(i) + '_' + str(b[i][0])+ '.jpg')
show_label_img()
def sp_noise(image,prob):
    '''
    添加椒盐噪声
    prob:噪声比例
    '''
    output = np.zeros(image.shape,np.uint8)
    thres = 1 - prob
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            rdn = random.random()
            if rdn < prob:
                output[i][j] = 0
            elif rdn > thres:
                output[i][j] = 255
            else:
                output[i][j] = image[i][j]
    return output
def gasuss_noise(image, mean=0, var=0.0001): # 0.001,0.002,0.0001
    '''
        添加高斯噪声
        mean : 均值
        var : 方差
    '''
    image = np.array(image/255, dtype=float)
    noise = np.random.normal(mean, var ** 0.5, image.shape)
    out = image + noise
    if out.min() < 0:
        low_clip = -1.
    else:
        low_clip = 0.
    out = np.clip(out, low_clip, 1.0)
    out = np.uint8(out*255)
    return out
def big_show(img):
    '''图像对比对调整'''
    img_bright = cv2.convertScaleAbs(img, alpha=0.095, beta=0)#0.8-1.3
    return  img_bright
def mak_dirl():
    '''根据字典生成目录'''
    path = "/Users/chenzhenyu/desktop/moto/kuozhan"
    print(index)
    for key in index.keys():
        print(key)
        #os.mkdir(path+'/'+ str(key))
#mak_dirl()
def del_unuse():
    path = "/Users/chenzhenyu/desktop/moto/kuozhan"
    for i in os.listdir(path):
        if i == '.DS_Store':
            continue
        for j in os.listdir(path + '/' + str(i)):
            if j == '.DS_Store':
                continue
            print(path + '/' + str(i) + '/' + str(j))
            print(str(j))
            aa = str(j).split('_')
            cc = aa[1]
            print(cc)
            dd = cc.split('.')
            print('dsdsd:',dd[0])
            if int(dd[0]) >4622:
                os.remove(path + '/' + str(i) + '/' + str(j))
            #print(aa[1])
def gather_order():
    '''1、将所有的数据生成一个总文件夹,文件夹中包括所有图像,每张图像命名包括:编号_标签_实际含义(98_5_晋.jpg);
    2、输入数据是一个一级文件夹,此一级文件夹下的二级文件夹是以标签名命名的文件夹:kuozhan>0,1,临......'''
    path = "/Users/chenzhenyu/desktop/moto/kuozhan"  # path为所有单个文件的总目录
    path_save = "/Users/chenzhenyu/desktop/moto/images"  # 保存的文件路径
    counter = 0
    for i in os.listdir(path):    #i表示在path文件夹下面的所有文件的文件名(此处表示0,2,晋,A等)
        if i == '.DS_Store':        #此过滤掉字符.DS_Store'
            continue
        #print(path + '/' + str(i))  #查看路径,path + '/' + str(i)表示要访问的一级文件的路径,打印结果类似于/Users/chenzhenyu/desktop/moto/kuozhan/A
        for j in os.listdir(path + '/' + str(i)):   #j为一级目录下的所有文件
            print(path + '/' + str(i) + '/' +str(j))  #打印最终要访问的文件,打印类似于/Users/chenzhenyu/desktop/moto/kuozhan/W/gs_9981.jpg
            img = cv2.imread(path + '/' + str(i) + '/' +str(j))  #读取路径当中的图像
            img = cv2.resize(img, (16, 16))  #将图像缩放至16*16的范围
            cv2.imwrite(path_save + '/' + str(counter) + '_' + str(index[str(i)]) + '_' + str(i) + '.jpg',img)
            print(path_save + '/' + str(counter) + '_' + str(index[str(i)]) + '_' + str(i) + '.jpg')
            counter = counter + 1
def disorder_data():
    '''将有序的图像打乱顺序,保存在另外一个文件夹中'''
    path = "/Users/chenzhenyu/desktop/moto/images"   #读入图像路径
    path_save = "/Users/chenzhenyu/desktop/moto/images_finally"   #将打乱顺序后的图像保存在该文件夹中
    list = os.listdir(path)
    print(list[0])
    random.shuffle(list)
    print(list[0])
    counts = 0
    for x in list:
        if x == '.DS_Store':  # 此过滤掉字符.DS_Store'
            continue
        # print(x)
        chara = str(x).split('_')  # 将文件名字符以'_'为界分开,chara[1]、chara[2]分别表示对应图像的标签和真实值
        print(path_save + '/' + str(x))
        img_finall = cv2.imread(path_save + '/' + str(x))
        cv2.imwrite(path_save + '/' + str(counts) + '_' + str(chara[1]) + '_' + str(chara[2]), img_finall)
        counts = counts + 1
def create_npy():
    '''将数据转化为.npy格式,生成最终的数据集,path表示路径,里面存放图像,图像命名格式为:"编号_标签_实名.jpg",
    其中,编号任意,各图像名互不相同即可,图像格式可以是.bmp,jpeg等,实名具体指标签对应的实际意义'''
    path = "/Users/chenzhenyu/desktop/moto/images_finally"
    list = os.listdir(path)
    x_list = []
    y_list = []
    for i in list:
        img = cv2.imread(path + '/' + str(i))   #读取图像文件,path + '/' + str(i)表示文件所在路径
        y_list.append(np.uint8(str(i).split('_')[1]))   #将标签从文件名取出,添加到标签列表中
        img = np.array(img)    #将图像转化为anrray数组
        x_list.append(img)  #将图像数据添加到训练集列表
    y_list = np.array(y_list)     #转换为数组
    y_data = y_list.reshape((len(list),1))  # 一维变二维
    x_data = np.array(x_list)
    print("x_data",x_data.shape,type(x_data),type(x_data[0][0][0][0]))
    print("y_data", y_data.shape, type(y_data), type(y_data[0][0]))
    np.save(file="/Users/chenzhenyu/desktop/finally/real_cate/x_data.npy",arr=x_data)
    np.save(file="/Users/chenzhenyu/desktop/finally/real_cate/y_data.npy", arr=y_data)
#create_npy
def count_number():
    '''统计数据集中数据的数量,包括图像总数,分多少类,每一类有多少张图像。这些信息只要加载标签数据就可以统计出来'''
    l_array = np.load("/Users/chenzhenyu/desktop/finally/real_cate/y_data.npy") #载入数据标签
    l_array = l_array.reshape((1,len(l_array)))
    l_array = l_array.tolist()
    l_array = l_array[0]
    myset = set(l_array)    #set可以将一个列表当中存在的元素返回给变量,返回的元素包括列表中所有元素,但不重复
    print(myset)
    for item in myset:
        print("the %d has found %d" % (item, l_array.count(item)))
#count_number()
def together_array():
    '''按照顺序合并数组,本结果将所有数据集合并为数据信息和对应标签两个文件'''
    x_data = np.load("/Users/chenzhenyu/desktop/finally/real_cate/x_data.npy")
    y_data = np.load("/Users/chenzhenyu/desktop/finally/real_cate/y_data.npy")
    a = np.load("/Users/chenzhenyu/desktop/finally/pure_create/x_train.npy")
    b = np.load("/Users/chenzhenyu/desktop/finally/pure_create/x_test.npy")
    a_lab = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_train.npy")
    b_lab = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_test.npy")
    c = np.vstack((a,b))               #数组按列拼接
    c_lab = np.vstack((a_lab,b_lab))
    x_all_data = np.vstack((c,x_data))
    y_all_data = np.vstack((c_lab,y_data))
    np.save(file="/Users/chenzhenyu/desktop/finally/total_data/x_all_data.npy",arr= x_all_data)
    np.save(file="/Users/chenzhenyu/desktop/finally/total_data/y_all_data.npy",arr = y_all_data)

    print(x_all_data.shape)
    print(y_all_data.shape)
def get_dic_key_value():
    '''访问字典的键和值'''
    print(len(index),index["京"])
    for key in index.keys():
        print(key)
    for v in index.values():
        print(v)
def k_means():
    x1 = np.array([1,1,2,3,4,5,6,7,8,9])
    x2 = np.array([1,4,7,9,2,1,4,7,8,7])
    x = np.array(list(zip(x1,x2))).reshape(len(x1),2)
    print(x,list(zip(x1,x2)))
    keans_model = KMeans(n_clusters = 4).fit(x)
    img =cv2.imread("/Users/chenzhenyu/desktop/new_data/plate/20.jpg")
    img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    #img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    imgll = img/255
    imgl = imgll.reshape(-1,3)
    keans_model = KMeans(n_clusters=2).fit(imgl)
    labels = keans_model.fit_predict(imgl)
    print(labels)
    colors = keans_model.cluster_centers_
    print(colors)
    print(img.shape[1])
    h,w = img.shape[0],img.shape[1]
    print(h,w)
    for i in range(h):
        for j in range(w):
            if imgll[i,j,0] == 0.11232347 or imgll[i,j,1] == 0.3675052 :
                imgl[i,j,1] =0
            else:
                imgl[i,j,1]= 255
    cv2.imshow('dsd',imgll)
    cv2.waitKey(0)

#k_means()
def counter_data():
    x_data = np.load("/Users/chenzhenyu/desktop/finally/total_data/x_all_data.npy")
    y_data = np.load("/Users/chenzhenyu/desktop/finally/total_data/y_all_data.npy")
    chinese = 0
    num = 0
    chara = 0
    print(y_data.shape)
    print(y_data[-1],y_data)
    for i in range(187472):
        if y_data[i] >= 0 and  y_data[i] <= 32:
            chinese = chinese +1
        elif y_data[i] >= 33 and  y_data[i] <= 42:
            chara = chara +1
        elif y_data[i] >= 43 and  y_data[i] <= 66:
            num = num +1
    total_num = chinese + num + chara
    print("汉字:",chinese)
    print("字母:",chara)
    print("数字:",num)
    print("总共:",total_num)

    x_data = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_test.npy")
    y_data = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_train.npy")
    print(int(len(x_data)) + int(len(y_data)))
#counter_data()
def zhuzhuang():
    #plt.rcParams['font.sans-serif'] = ['SimHei']
    #plt.rcParams['axes.unicode_minus'] = False

    waters = ('汉字',  '字母','数字')
    buy_number = [94592,26400,66480]

    plt.bar(waters, buy_number)
    plt.title('男性购买饮用水情况的调查结果')

    plt.show()
#zhuzhuang()

你可能感兴趣的:(python完整小项目,python,小程序)