#都是单独的函数,朋友们要是有需要可以直接复制粘贴,内容都有标注。
'''
data:2020.10
author:Xiao Yu
'''
import os
import cv2
import numpy as np
from PIL import Image
import random
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
path = "/Users/chenzhenyu/desktop/python/create/image/0"
path_s ="/Users/chenzhenyu/desktop/cart_data/test/0"
index = {
"京": 0, "沪": 1, "津": 2, "渝": 3, "冀": 4, "晋": 5, "蒙": 6, "辽": 7, "吉": 8, "黑": 9, "苏": 10, "浙": 11, "皖": 12,
"闽": 13, "赣": 14, "鲁": 15, "豫": 16, "鄂": 17, "湘": 18, "粤": 19, "桂": 20, "琼": 21, "川": 22, "贵": 23, "云": 24,
"藏": 25, "陕": 26, "甘": 27, "青": 28, "宁": 29, "新": 30,"临":31,"时":32, "0": 33, "1": 34, "2": 35, "3": 36, "4": 37, "5": 38,
"6": 39, "7": 40, "8": 41, "9": 42, "A": 43, "B": 44, "C": 45, "D": 46, "E": 47, "F": 48, "G": 49, "H": 50,
"J": 51, "K": 52, "L": 53, "M": 54, "N": 55, "P": 56, "Q": 57, "R": 58, "S": 59, "T": 60, "U": 61, "V": 62,
"W": 63, "X": 64, "Y": 65, "Z": 66}
indexl = ["京", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "皖",
"闽", "赣", "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云",
"藏", "陕", "甘", "青", "宁", "新","临","时", "0", "1", "2", "3", "4", "5",
"6", "7", "8", "9", "A", "B", "C", "D", "E", "F", "G", "H",
"J", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "U", "V",
"W", "X", "Y", "Z"]
def show_label_img():
'''将生成的数组以图像的形式存储,命名格式为:图像编号+标签+标签对应的实际含义'''
a = np.load("/Users/chenzhenyu/desktop/finally/pure_create/x_test.npy")
b = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_test.npy")
#a= np.load(file="/Users/chenzhenyu/desktop/new_data/x_test.npy")
#b= np.load(file="/Users/chenzhenyu/desktop/new_data/y_test.npy")
print(b[1])
print(b.shape,b.shape,len(b))
a = a[0:200]
b = b[0:200]
for i in range(len(a)):
#print(i)
img = Image.fromarray(a[i], 'RGB')
#print(b[i][0])
print("/Users/chenzhenyu/desktop/new_data/show_img/"+str(i)+'_'+str(b[i][0])+'_'+str(indexl[b[i][0]])+'.jpg')
#img.save("/Users/chenzhenyu/desktop/finally/show/"+str(i)+'_'+str(b[i][0])+'_'+str(indexl[b[i][0]])+'.jpg')
img.save("/Users/chenzhenyu/desktop/finally/show/" + str(i) + '_' + str(b[i][0])+ '.jpg')
show_label_img()
def sp_noise(image,prob):
'''
添加椒盐噪声
prob:噪声比例
'''
output = np.zeros(image.shape,np.uint8)
thres = 1 - prob
for i in range(image.shape[0]):
for j in range(image.shape[1]):
rdn = random.random()
if rdn < prob:
output[i][j] = 0
elif rdn > thres:
output[i][j] = 255
else:
output[i][j] = image[i][j]
return output
def gasuss_noise(image, mean=0, var=0.0001): # 0.001,0.002,0.0001
'''
添加高斯噪声
mean : 均值
var : 方差
'''
image = np.array(image/255, dtype=float)
noise = np.random.normal(mean, var ** 0.5, image.shape)
out = image + noise
if out.min() < 0:
low_clip = -1.
else:
low_clip = 0.
out = np.clip(out, low_clip, 1.0)
out = np.uint8(out*255)
return out
def big_show(img):
'''图像对比对调整'''
img_bright = cv2.convertScaleAbs(img, alpha=0.095, beta=0)#0.8-1.3
return img_bright
def mak_dirl():
'''根据字典生成目录'''
path = "/Users/chenzhenyu/desktop/moto/kuozhan"
print(index)
for key in index.keys():
print(key)
#os.mkdir(path+'/'+ str(key))
#mak_dirl()
def del_unuse():
path = "/Users/chenzhenyu/desktop/moto/kuozhan"
for i in os.listdir(path):
if i == '.DS_Store':
continue
for j in os.listdir(path + '/' + str(i)):
if j == '.DS_Store':
continue
print(path + '/' + str(i) + '/' + str(j))
print(str(j))
aa = str(j).split('_')
cc = aa[1]
print(cc)
dd = cc.split('.')
print('dsdsd:',dd[0])
if int(dd[0]) >4622:
os.remove(path + '/' + str(i) + '/' + str(j))
#print(aa[1])
def gather_order():
'''1、将所有的数据生成一个总文件夹,文件夹中包括所有图像,每张图像命名包括:编号_标签_实际含义(98_5_晋.jpg);
2、输入数据是一个一级文件夹,此一级文件夹下的二级文件夹是以标签名命名的文件夹:kuozhan>0,1,临......'''
path = "/Users/chenzhenyu/desktop/moto/kuozhan" # path为所有单个文件的总目录
path_save = "/Users/chenzhenyu/desktop/moto/images" # 保存的文件路径
counter = 0
for i in os.listdir(path): #i表示在path文件夹下面的所有文件的文件名(此处表示0,2,晋,A等)
if i == '.DS_Store': #此过滤掉字符.DS_Store'
continue
#print(path + '/' + str(i)) #查看路径,path + '/' + str(i)表示要访问的一级文件的路径,打印结果类似于/Users/chenzhenyu/desktop/moto/kuozhan/A
for j in os.listdir(path + '/' + str(i)): #j为一级目录下的所有文件
print(path + '/' + str(i) + '/' +str(j)) #打印最终要访问的文件,打印类似于/Users/chenzhenyu/desktop/moto/kuozhan/W/gs_9981.jpg
img = cv2.imread(path + '/' + str(i) + '/' +str(j)) #读取路径当中的图像
img = cv2.resize(img, (16, 16)) #将图像缩放至16*16的范围
cv2.imwrite(path_save + '/' + str(counter) + '_' + str(index[str(i)]) + '_' + str(i) + '.jpg',img)
print(path_save + '/' + str(counter) + '_' + str(index[str(i)]) + '_' + str(i) + '.jpg')
counter = counter + 1
def disorder_data():
'''将有序的图像打乱顺序,保存在另外一个文件夹中'''
path = "/Users/chenzhenyu/desktop/moto/images" #读入图像路径
path_save = "/Users/chenzhenyu/desktop/moto/images_finally" #将打乱顺序后的图像保存在该文件夹中
list = os.listdir(path)
print(list[0])
random.shuffle(list)
print(list[0])
counts = 0
for x in list:
if x == '.DS_Store': # 此过滤掉字符.DS_Store'
continue
# print(x)
chara = str(x).split('_') # 将文件名字符以'_'为界分开,chara[1]、chara[2]分别表示对应图像的标签和真实值
print(path_save + '/' + str(x))
img_finall = cv2.imread(path_save + '/' + str(x))
cv2.imwrite(path_save + '/' + str(counts) + '_' + str(chara[1]) + '_' + str(chara[2]), img_finall)
counts = counts + 1
def create_npy():
'''将数据转化为.npy格式,生成最终的数据集,path表示路径,里面存放图像,图像命名格式为:"编号_标签_实名.jpg",
其中,编号任意,各图像名互不相同即可,图像格式可以是.bmp,jpeg等,实名具体指标签对应的实际意义'''
path = "/Users/chenzhenyu/desktop/moto/images_finally"
list = os.listdir(path)
x_list = []
y_list = []
for i in list:
img = cv2.imread(path + '/' + str(i)) #读取图像文件,path + '/' + str(i)表示文件所在路径
y_list.append(np.uint8(str(i).split('_')[1])) #将标签从文件名取出,添加到标签列表中
img = np.array(img) #将图像转化为anrray数组
x_list.append(img) #将图像数据添加到训练集列表
y_list = np.array(y_list) #转换为数组
y_data = y_list.reshape((len(list),1)) # 一维变二维
x_data = np.array(x_list)
print("x_data",x_data.shape,type(x_data),type(x_data[0][0][0][0]))
print("y_data", y_data.shape, type(y_data), type(y_data[0][0]))
np.save(file="/Users/chenzhenyu/desktop/finally/real_cate/x_data.npy",arr=x_data)
np.save(file="/Users/chenzhenyu/desktop/finally/real_cate/y_data.npy", arr=y_data)
#create_npy
def count_number():
'''统计数据集中数据的数量,包括图像总数,分多少类,每一类有多少张图像。这些信息只要加载标签数据就可以统计出来'''
l_array = np.load("/Users/chenzhenyu/desktop/finally/real_cate/y_data.npy") #载入数据标签
l_array = l_array.reshape((1,len(l_array)))
l_array = l_array.tolist()
l_array = l_array[0]
myset = set(l_array) #set可以将一个列表当中存在的元素返回给变量,返回的元素包括列表中所有元素,但不重复
print(myset)
for item in myset:
print("the %d has found %d" % (item, l_array.count(item)))
#count_number()
def together_array():
'''按照顺序合并数组,本结果将所有数据集合并为数据信息和对应标签两个文件'''
x_data = np.load("/Users/chenzhenyu/desktop/finally/real_cate/x_data.npy")
y_data = np.load("/Users/chenzhenyu/desktop/finally/real_cate/y_data.npy")
a = np.load("/Users/chenzhenyu/desktop/finally/pure_create/x_train.npy")
b = np.load("/Users/chenzhenyu/desktop/finally/pure_create/x_test.npy")
a_lab = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_train.npy")
b_lab = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_test.npy")
c = np.vstack((a,b)) #数组按列拼接
c_lab = np.vstack((a_lab,b_lab))
x_all_data = np.vstack((c,x_data))
y_all_data = np.vstack((c_lab,y_data))
np.save(file="/Users/chenzhenyu/desktop/finally/total_data/x_all_data.npy",arr= x_all_data)
np.save(file="/Users/chenzhenyu/desktop/finally/total_data/y_all_data.npy",arr = y_all_data)
print(x_all_data.shape)
print(y_all_data.shape)
def get_dic_key_value():
'''访问字典的键和值'''
print(len(index),index["京"])
for key in index.keys():
print(key)
for v in index.values():
print(v)
def k_means():
x1 = np.array([1,1,2,3,4,5,6,7,8,9])
x2 = np.array([1,4,7,9,2,1,4,7,8,7])
x = np.array(list(zip(x1,x2))).reshape(len(x1),2)
print(x,list(zip(x1,x2)))
keans_model = KMeans(n_clusters = 4).fit(x)
img =cv2.imread("/Users/chenzhenyu/desktop/new_data/plate/20.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imgll = img/255
imgl = imgll.reshape(-1,3)
keans_model = KMeans(n_clusters=2).fit(imgl)
labels = keans_model.fit_predict(imgl)
print(labels)
colors = keans_model.cluster_centers_
print(colors)
print(img.shape[1])
h,w = img.shape[0],img.shape[1]
print(h,w)
for i in range(h):
for j in range(w):
if imgll[i,j,0] == 0.11232347 or imgll[i,j,1] == 0.3675052 :
imgl[i,j,1] =0
else:
imgl[i,j,1]= 255
cv2.imshow('dsd',imgll)
cv2.waitKey(0)
#k_means()
def counter_data():
x_data = np.load("/Users/chenzhenyu/desktop/finally/total_data/x_all_data.npy")
y_data = np.load("/Users/chenzhenyu/desktop/finally/total_data/y_all_data.npy")
chinese = 0
num = 0
chara = 0
print(y_data.shape)
print(y_data[-1],y_data)
for i in range(187472):
if y_data[i] >= 0 and y_data[i] <= 32:
chinese = chinese +1
elif y_data[i] >= 33 and y_data[i] <= 42:
chara = chara +1
elif y_data[i] >= 43 and y_data[i] <= 66:
num = num +1
total_num = chinese + num + chara
print("汉字:",chinese)
print("字母:",chara)
print("数字:",num)
print("总共:",total_num)
x_data = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_test.npy")
y_data = np.load("/Users/chenzhenyu/desktop/finally/pure_create/y_train.npy")
print(int(len(x_data)) + int(len(y_data)))
#counter_data()
def zhuzhuang():
#plt.rcParams['font.sans-serif'] = ['SimHei']
#plt.rcParams['axes.unicode_minus'] = False
waters = ('汉字', '字母','数字')
buy_number = [94592,26400,66480]
plt.bar(waters, buy_number)
plt.title('男性购买饮用水情况的调查结果')
plt.show()
#zhuzhuang()