神经网络的复杂之处在于它的组成结构太复杂,神经元太多
这是一个只有两层的神经网络,假定输入x,我们规定隐层h和输出层o这两层都是z=wx+b和f(z)=11+e−z的组合,一旦输入样本x和标签y之后,模型就开始训练了。那么我们的问题就变成了求隐层的w、b和输出层的w、b四个参数的过程。
训练的目的是神经网络的输出和真实数据的输出"一样",但是在"一样"之前,模型输出和真实数据都是存在一定的差异,我们把这个"差异"作这样的一个参数e代表误差的意思,那么模型输出加上误差之后就等于真实标签了,作:y=wx+b+e
当我们有n对x和y那么就有n个误差e,我们试着把n个误差e都加起来表示一个误差总量,为了不让残差正负抵消我们取平方或者取绝对值,本文取平方。这种误差我们称为“残差”,也就是模型的输出的结果和真实结果之间的差值。损失函数Loss还有一种称呼叫做“代价函数Cost”,残差表达式如下:
L o s s = ∑ i = 1 n e 2 i = ∑ i = 1 n ( y i − ( w x i + b ) ) 2 Loss=∑i=1ne2i=∑i=1n(yi−(wxi+b))2 Loss=∑i=1ne2i=∑i=1n(yi−(wxi+b))2
现在我们要做的就是找到一个比较好的w和b,使得整个Loss尽可能的小,越小说明我们训练出来的模型越好。
BP算法主要有以下三个步骤 :
图构解析【反向传播】
损失函数展开如下图:
L o s s = ∑ i = 1 n ( x 2 i w 2 + b 2 + 2 x i w b − 2 y i b − 2 x i y i w + y 2 i ) = A w 2 + B b 2 + C w b + D b + D w + E b + F Loss=∑i=1n(x2iw2+b2+2xiwb−2yib−2xiyiw+y2i)=Aw2+Bb2+Cwb+Db+Dw+Eb+F Loss=∑i=1n(x2iw2+b2+2xiwb−2yib−2xiyiw+y2i)=Aw2+Bb2+Cwb+Db+Dw+Eb+F
初始化一个wo和b0,带到Loss里面去,这个点(wo,bo,Losso)会出现在图示的某个位置,我们的目标是最低点。
x n + 1 = x n − η d f ( x ) / d x x_n+_1=x_n−ηdf(x)/dx xn+1=xn−ηdf(x)/dx
上式为梯度下降算法的公式;其中 d f ( x ) / d x df(x)/dx df(x)/dx为梯度,η是学习率,也就是每次挪动的步长,η大每次迭代的脚步就大,η小每次迭代的脚步就小,我们只有取到合适的η才能尽可能的接近最小值而不会因为步子太大越过了最小值。
python实现创建文件并将获取的数据集导入对应文件
from tensorflow_core.examples.tutorials.mnist import input_data
from scipy import misc
import numpy as np
import os
mnist = input_data.read_data_sets('data',one_hot=True)
if not os.path.exists('train'): #创建 train 文件夹作为训练数据集,将训练集压缩包里的图片解压到 train 文件夹中
os.mkdir('train')
if not os.path.exists('test'): #创建 test 文件夹作为测试数据集,将测试集压缩包里的图片解压到 test 文件夹中
os.mkdir('test')
for (idx, img) in enumerate(mnist.train.images): #将压缩数据集中的 训练数据图片 解压到 train 文件夹下
img_arr = np.reshape(img, [28,28])
misc.imsave('train/train_' + str(idx) + '.png',img_arr)
for (idx, img) in enumerate(mnist.test.images): #将压缩数据集中的 测试数据图片 解压到 test 文件夹下
img_arr = np.reshape(img, [28,28])
misc.imsave('test/test_' + str(idx) + '.png',img_arr)
将图片数据转换为数组形式
from PIL import Image
import numpy as np
def pre_dic(pic_path):
img = Image.open(pic_path)
reIm = img.resize((28, 28), Image.ANTIALIAS)
img_array = np.array(reIm.convert('L'))
nm_array = img_array.reshape([1, 784])
nm_array = nm_array.astype(np.float32)
img_ready = np.multiply(nm_array, 1.0/255.0)
return img_ready
if __name__ == '__main__':
pic_name = [
"0.png",
"1.png",
"2.png",
"3.png",
"4.png",
"5.png",
"6.png",
"7.png",
"8.png",
"9.png"]
save_pic_name = [
"input_0",
"input_1",
"input_2",
"input_3",
"input_4",
"input_5",
"input_6",
"input_7",
"input_8",
"input_9"
]
for i in range (10):
array = pre_dic(pic_name[i])
with open(save_pic_name[i]+'.h', 'w') as f:
new_str1 = str(array.tolist())
new_str2 = new_str1.replace('[','')
new_str3 = new_str2.replace(']','')
f.write("float "+save_pic_name[i]+"[784]={"+new_str3+"};")
f.close()
转换结果【举一例】
float input_5[784]={0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.250980406999588, 0.250980406999588, 1.0, 0.7176470756530762, 0.9921569228172302, 0.5490196347236633, 0.4745098352432251, 0.4745098352432251, 0.14901961386203766, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.35686275362968445, 0.8313726186752319, 0.9411765336990356, 0.9411765336990356, 0.9411765336990356, 0.9647059440612793, 0.9647059440612793, 0.9921569228172302, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.30980393290519714, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5215686559677124, 0.9725490808486938, 0.988235354423523, 0.7921569347381592, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.9921569228172302, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.7764706611633301, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8862745761871338, 0.988235354423523, 0.988235354423523, 0.6313725709915161, 0.9372549653053284, 0.6745098233222961, 0.6745098233222961, 0.40000003576278687, 0.43137258291244507, 0.15294118225574493, 0.15294118225574493, 0.15294118225574493, 0.6470588445663452, 0.988235354423523, 0.9647059440612793, 0.30588236451148987, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8862745761871338, 0.988235354423523, 0.988235354423523, 0.4078431725502014, 0.1725490242242813, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05490196496248245, 0.20784315466880798, 0.20784315466880798, 0.0784313753247261, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.501960813999176, 0.988235354423523, 0.988235354423523, 0.25882354378700256, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3686274588108063, 0.988235354423523, 0.988235354423523, 0.3764706254005432, 0.16078431904315948, 0.16078431904315948, 0.07450980693101883, 0.07450980693101883, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.13725490868091583, 0.8352941870689392, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.7960785031318665, 0.7960785031318665, 0.6313725709915161, 0.4078431725502014, 0.06666667014360428, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3176470696926117, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.9921569228172302, 0.988235354423523, 0.8352941870689392, 0.4117647409439087, 0.03921568766236305, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.3176470696926117, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.9921569228172302, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.7254902124404907, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.1411764770746231, 0.6196078658103943, 0.917647123336792, 0.572549045085907, 0.7176470756530762, 0.7725490927696228, 0.7490196228027344, 0.9921569228172302, 0.9921569228172302, 0.9921569228172302, 0.9921569228172302, 0.5058823823928833, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.011764707043766975, 0.04313725605607033, 0.007843137718737125, 0.02352941408753395, 0.027450982481241226, 0.02352941408753395, 0.545098066329956, 0.9450981020927429, 0.988235354423523, 0.988235354423523, 0.9764706492424011, 0.2980392277240753, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.09019608050584793, 0.8509804606437683, 0.988235354423523, 0.988235354423523, 0.501960813999176, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.8352941870689392, 0.988235354423523, 0.988235354423523, 0.9647059440612793, 0.30588236451148987, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.43137258291244507, 0.9568628072738647, 0.988235354423523, 0.988235354423523, 0.8627451658248901, 0.0784313753247261, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.05882353335618973, 0.05490196496248245, 0.0, 0.0, 0.125490203499794, 0.21176472306251526, 0.7333333492279053, 0.729411780834198, 0.960784375667572, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.8313726186752319, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6196078658103943, 0.7568628191947937, 0.6784313917160034, 0.6784313917160034, 0.8588235974311829, 0.988235354423523, 0.9921569228172302, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.9529412388801575, 0.4235294461250305, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.4274510145187378, 0.960784375667572, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.9921569228172302, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.960784375667572, 0.4235294461250305, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.6941176652908325, 0.9450981020927429, 0.988235354423523, 0.988235354423523, 0.988235354423523, 0.9921569228172302, 0.9647059440612793, 0.9333333969116211, 0.9333333969116211, 0.41568630933761597, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.07450980693101883, 0.5176470875740051, 0.988235354423523, 0.988235354423523, 0.4705882668495178, 0.2705882489681244, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
c语言实现全连接神经网络推理
//图片输入 28x28 = 784(像素点)
/********************推理函数******************
第一层:权重:784*64 偏置:64
第二层:权重:64*10 偏置:10
找到最大值结果输出 0-9
**********************************************/
#include
#include"input_0.h"
#include"input_1.h"
#include"input_2.h"
#include"input_3.h"
#include"input_4.h"
#include"input_5.h"
#include"input_6.h"
#include"input_7.h"
#include"input_8.h"
#include"input_9.h"
#include"layer1_bais.h"
#include"layer1_weight.h"
#include"layer2_bais.h"
#include"layer2_weight.h"
//输出第一层权重、偏置 第二层权重偏置
//输出推理结果
int full_connect(float *img,float *w1,float *b1,float *w2,float *b2)
{
int i,j,ret;
float a1[64],a2[10],temp = 0;
// float y ;
//第一层神经网络计算
//多点计算
for(i=0;i<64;i++)
{
//y = 0;
a1[i]=0;
//单点计算 y = ∑wn*xn n=783
for(j=0;j<784;j++)
{
a1[i] += w1[i+j*64]*img[j];
}
//加偏置
a1[i] = a1[i] + b1[i];
//激活函数添加 relu
a1[i] = (a1[i] + b1[i])>0?(a1[i] + b1[i]):0;
//a1[i] = y;
}
//第二层神经网络计算
//单点计算 y = ∑w64*a64
for(i=0;i<10;i++)
{
a2[i] = 0.0;
for(j=0;j<64;j++)
{
a2[i] += w2[j*10+i]*a1[j];
}
a2[i] = a2[i] + b2[i];
//a2[i] = y;
}
//找到概率最大值
for(i=0;i<10;i++)
{
if(a2[i]>temp)
{
temp = a2[i];
ret = i;
}
}
return ret;
}
int main()
{
int result;
float *imagx[10] = {
input_0,
input_1,
input_2,
input_3,
input_4,
input_5,
input_6,
input_7,
input_8,
input_9
};
for(int i=0;i<10;i++)
{
result = full_connect(imagx[i],layer1_weight,layer1_bais,layer2_weight,layer2_bais);
printf("input:%d,predict:%d\n",i,result);
}
return 0;
}
指定全连接c推理为从机组件【avalon_slave】
//图片输入 28x28 = 784(像素点)
/********************推理函数******************
第一层:权重:784*64 偏置:64
第二层:权重:64*10 偏置:10
找到最大值结果输出 0-9
**********************************************/
#include
#include"HLS\hls.h"
#include"input_0.h"
#include"input_1.h"
#include"input_2.h"
#include"input_3.h"
#include"input_4.h"
#include"input_5.h"
#include"input_6.h"
#include"input_7.h"
#include"input_8.h"
#include"input_9.h"
#include"layer1_bais.h"
#include"layer1_weight.h"
#include"layer2_bais.h"
#include"layer2_weight.h"
//输出第一层权重、偏置 第二层权重偏置
//输出推理结果
hls_avalon_slave_component
component int full_connect(
hls_avalon_slave_memory_argument(784*sizeof(float)) float *img,
hls_avalon_slave_memory_argument(784*64*sizeof(float)) float *w1,
hls_avalon_slave_memory_argument(64*sizeof(float)) float *b1,
hls_avalon_slave_memory_argument(64*10*sizeof(float)) float *w2,
hls_avalon_slave_memory_argument(10*sizeof(float)) float *b2)
{
int i,j,ret;
float a1[64],a2[10],temp = 0;
// float y ;
//第一层神经网络计算
//多点计算
for(i=0;i<64;i++)
{
//y = 0;
a1[i]=0;
//单点计算 y = ∑wn*xn n=783
for(j=0;j<784;j++)
{
a1[i] += w1[i+j*64]*img[j];
}
//加偏置
a1[i] = a1[i] + b1[i];
//激活函数添加 relu
a1[i] = (a1[i] + b1[i])>0?(a1[i] + b1[i]):0;
//a1[i] = y;
}
//第二层神经网络计算
//单点计算 y = ∑w64*a64
for(i=0;i<10;i++)
{
a2[i] = 0.0;
for(j=0;j<64;j++)
{
a2[i] += w2[j*10+i]*a1[j];
}
a2[i] = a2[i] + b2[i];
//a2[i] = y;
}
//找到概率最大值
for(i=0;i<10;i++)
{
if(a2[i]>temp)
{
temp = a2[i];
ret = i;
}
}
return ret;
}
int main()
{
int result;
float *imagx[10] = {
input_0,
input_1,
input_2,
input_3,
input_4,
input_5,
input_6,
input_7,
input_8,
input_9
};
for(int i=0;i<10;i++)
{
result = full_connect(imagx[i],layer1_weight,layer1_bais,layer2_weight,layer2_bais);
printf("input:%d,predict:%d\n",i,result);
}
return 0;
}
生成IP
init_hls.bat//hls初始化
i++ -march=x86-64 handwr.cpp.cpp //在x86-64上编译程序
i++ -march=CycloneV handwr.cpp.cpp//硬件编译,生成硬件语言
platform designer挂载生成的ip
在hls目录下,找到ip;双击打开
点击finish,出现报错如示【未连线】
开始连线【连线完成,重新分配基地址】
配置完成,点击generate HDL…
quartus会卡顿,弹出如示界面【弹出则配置成功,反之失败】
工程全编译
生成设备树、rbf以及头文件
替换镜像中的dtb以及rbf文件
接下来,用eclipse进行地址映射。
创建新工程,进行虚拟地址映射,源码如下
//GCC标准头文件
#include
#include
#include
#include
#include
#include
//HPS厂家提供的底层定义头文件
#define soc_cv_av
#include "hwlib.h"
#include "socal/socal.h"
#include "socal/hps.h"
//同用户具体的HPS应用系统相关硬件描述头文件
#include"hps_0.h"
#define HW_REGS_BASE (ALT_STM_OFST) //HPS外设地址段基地址
#define HW_REGS_SPAN (0x04000000) //HPS外设地址段地址空间 64MB
#define HW_REGS_MASK (HW_REGS_SPAN - 1) //HPS外设地址段地址掩码
//导入头文件
#include"input_0.h"
#include"input_1.h"
#include"input_2.h"
#include"input_3.h"
#include"input_4.h"
#include"input_5.h"
#include"input_6.h"
#include"input_7.h"
#include"input_8.h"
#include"input_9.h"
#include"layer1_bais.h"
#include"layer1_weight.h"
#include"layer2_bais.h"
#include"layer2_weight.h"
static float *input_img=NULL;
static float *input_w1=NULL;
static float *input_b1=NULL;
static float *input_w2=NULL;
static float *input_b2=NULL;
static volatile long long *control=NULL;
void *virtual_base = NULL;
int fpga_init()
{
int fd;
fd = open("/dev/mem",(O_RDWR | O_SYNC));
if(fd == -1)
{
printf("ERROR:open failed\n");
}
virtual_base = mmap(NULL,HW_REGS_SPAN,(PROT_READ | PROT_WRITE),MAP_SHARED,fd,HW_REGS_BASE);
if(virtual_base == MAP_FAILED)
{
printf("ERROR:open MMAP() failed\n");
close(fd);
return 1;
}
input_img = (float*)(virtual_base +
((unsigned long)(ALT_LWFPGASLVS_OFST + CONNECT_0_FULL_CONNECT_INTERNAL_INST_AVS_IMG_BASE) & (unsigned long)(HW_REGS_MASK)));
input_w1 = (float*)(virtual_base +
((unsigned long)(ALT_LWFPGASLVS_OFST + CONNECT_0_FULL_CONNECT_INTERNAL_INST_AVS_W1_BASE) & (unsigned long)(HW_REGS_MASK)));
input_b1 = (float*)(virtual_base +
((unsigned long)(ALT_LWFPGASLVS_OFST + CONNECT_0_FULL_CONNECT_INTERNAL_INST_AVS_B1_BASE) & (unsigned long)(HW_REGS_MASK)));
input_w2 = (float*)(virtual_base +
((unsigned long)(ALT_LWFPGASLVS_OFST + CONNECT_0_FULL_CONNECT_INTERNAL_INST_AVS_W2_BASE) & (unsigned long)(HW_REGS_MASK)));
input_b2 = (float*)(virtual_base +
((unsigned long)(ALT_LWFPGASLVS_OFST + CONNECT_0_FULL_CONNECT_INTERNAL_INST_AVS_B2_BASE) & (unsigned long)(HW_REGS_MASK)));
control = (long long*)(virtual_base +
((unsigned long)(ALT_LWFPGASLVS_OFST + CONNECT_0_FULL_CONNECT_INTERNAL_INST_AVS_CRA_BASE) & (unsigned long)(HW_REGS_MASK)));
return fd;
}
int main()
{
int fd;
int i;
int ret;
struct timeval timev;
int h,h0,h1;
float* yu[] = { input_0,input_1,input_2,input_3,
input_4,input_5,input_6,input_7,
input_8,input_9 };
fd = fpga_init();
memcpy(input_w1,layer1_weight,784*64*sizeof(float));
memcpy(input_b1,layer1_bais,64*sizeof(float));
memcpy(input_w2,layer2_weight,64*10*sizeof(float));
memcpy(input_b2,layer2_bais,10*sizeof(float));
for(i=0;i<10;i++)
{
gettimeofday(&timev,NULL);
h0 = timev.tv_usec;
memcpy(input_img,yu[i],784*sizeof(float));
*(control+1) = 0x1;
while((*(control+3) & 0x02) == 0);
*(control+1) = 0x0;
ret = *(control + 4);
gettimeofday(&timev,NULL);
h1 = timev.tv_usec;//us
h = h1 - h0;
h0 = h/1000 ;
h1 = (h%1000)/10;
printf("input:%d ret:%d time:%d.%d ms\n",i,ret,h0,h1);
}
munmap(virtual_base,HW_REGS_SPAN);
close(fd);
return 0;
}
这篇文章由很多步骤都是之前做过的,因此像设备树生成、文件创建、增加外设以及板子验证等步骤都没做详细介绍。对于这些步骤,可以参考之前的文章;
SoC学习篇—实现hello FPGA打印
SoC学习篇—外设IP使用 PIO_LED 点灯
HLS学习篇—搭配hls环境及操作实例
深度学习中的激活函数
全连接神经网络(DNN)