和SVM的实现存在一定的相似性,要通过划分训练集的一部分作为验证集来确定超参数。在得出对应的评分之后,要求出对应的概率,同时利用梯度更新的时候,注意求导与SVM存在不同即可,具体实现见如下代码,准确率基本稳定在39%左右。
import numpy as np
import pickle as pic
class softmax:
weight=[]
l_rate=0
reg=0
def readData(self,file):
with open(file,'rb') as fo:
dict=pic.load(fo,encoding='bytes')
return dict
def getGradient(self,sample_train_data,sample_train_label,reg):
amount=sample_train_data.shape[0]
score=self.weight.dot(sample_train_data.T)
per_amount=np.max(score,axis=0)
score=score-per_amount
exp_score=np.exp(score)
per_sum=np.sum(exp_score,axis=0)
exp_score=exp_score/per_sum
exp_score[sample_train_label,range(amount)]-=1
gred=exp_score.dot(sample_train_data)/amount+reg*self.weight
return gred
def getPrecision(self,data,label):
temp_result=self.weight.dot(data.T)
res=np.argmax(temp_result,axis=0)
return np.mean(res==label)
def Train(self,amount,iteration,train_data,train_label,validate_data,validate_label,le,re):
total=train_data.shape[0]
self.weight=0.001*np.random.randn(10,3073)
for i in range(0,iteration):
sample=np.random.choice(total,amount,replace=False)
sample_train_date=train_data[sample,:]
sample_train_label=train_label[sample]
gred=self.getGradient(sample_train_date,sample_train_label,re)
self.weight-=le*gred
return self.getPrecision(validate_data,validate_label)
def Train_for_Test(self,amount,iteration,train_data,train_label):
total = train_data.shape[0]
for i in range(0,iteration):
sample = np.random.choice(total, amount, replace=False)
sample_train_date = train_data[sample, :]
sample_train_label = train_label[sample]
gred = self.getGradient(sample_train_date, sample_train_label, self.reg)
self.weight -= self.l_rate * gred
def Test(self,path_train,path_test):
train_data,train_label,test_data,test_label=[],[],[],[]
for i in range(1,6):
cur_path=path_train+str(i)
read_temp=self.readData(cur_path)
if(i==1):
train_data=read_temp[b'data']
train_label=read_temp[b'labels']
else:
train_data=np.append(train_data,read_temp[b'data'],axis=0)
train_label+=read_temp[b'labels']
train_label=np.array(train_label)
mean_image=np.mean(train_data,axis=0)
train_data=train_data-mean_image#预处理
read_temp=self.readData(path_test)
test_data=read_temp[b'data']
test_label=np.array(read_temp[b'labels'])
test_data=test_data-mean_image#预处理
train_data=np.hstack([train_data,np.ones((train_data.shape[0], 1))])#训练数据增加一维
test_data=np.hstack([test_data,np.ones((test_data.shape[0],1))])#测试数据增加一维
amount_train=train_data.shape[0]
amount_validate=20000
amount_train-=amount_validate
validate_data=train_data[amount_train:,:]#验证集合数据
validate_label=train_label[amount_train:]#验证集合标签
train_data=train_data[:amount_train,:]#训练集合数据
train_label=train_label[:amount_train]#训练集合标签
learning =[2e-6,3e-6]
regu =[5,6, 7, 8, 9, 10]
pre=-1
for le in learning:
for re in regu:
temp_precision=self.Train(200,1500,train_data,train_label,
validate_data,validate_label,le,re)
print(temp_precision)
if(temp_precision>pre):
pre=temp_precision
self.l_rate=le
self.reg=re
print("result: " + str(pre) + " " + str(self.l_rate) + " " + str(self.reg))
self.weight=self.weight=0.001*np.random.randn(10,3073)
train_data=np.append(train_data,validate_data,axis=0)
train_label=np.append(train_label,validate_label,axis=0)
self.Train_for_Test(200,10000,train_data,train_label)
final_pre = self.getPrecision(test_data, test_label)
print("final accurracy: " + str(final_pre))
a=softmax()
a.Test("D:\\data\\cifar-10-batches-py\\data_batch_",
"D:\\data\\cifar-10-batches-py\\test_batch")