qa_model

  1 [code=python]

  2 import os

  3 import sys

  4 import time

  5 

  6 import numpy

  7 

  8 import shelve

  9 

 10 import theano

 11 import theano.tensor as T

 12 from theano.tensor.shared_randomstreams import RandomStreams

 13 

 14 class dA(object):

 15     """Denoising Auto-Encoder class (dA)

 16 

 17     A denoising autoencoders tries to reconstruct the input from a corrupted

 18     version of it by projecting it first in a latent space and reprojecting

 19     it afterwards back in the input space. Please refer to Vincent et al.,2008

 20     for more details. If x is the input then equation (1) computes a partially

 21     destroyed version of x by means of a stochastic mapping q_D. Equation (2)

 22     computes the projection of the input into the latent space. Equation (3)

 23     computes the reconstruction of the input, while equation (4) computes the

 24     reconstruction error.

 25 

 26     .. math::

 27 

 28         \tilde{x} ~ q_D(\tilde{x}|x)                                     (1)

 29 

 30         y = s(W \tilde{x} + b)                                           (2)

 31 

 32         x = s(W' y  + b')                                                (3)

 33 

 34         L(x,z) = -sum_{k=1}^d [x_k \log z_k + (1-x_k) \log( 1-z_k)]      (4)

 35 

 36     """

 37 

 38     def __init__(

 39         self,

 40         numpy_rng,

 41         theano_rng=None,

 42         input=None,

 43         #n_visible=784,

 44         n_hidden=100,

 45         W=None,

 46         bhid=None,

 47         #bvis=None

 48     ):

 49         """

 50         Initialize the dA class by specifying the number of visible units (the

 51         dimension d of the input ), the number of hidden units ( the dimension

 52         d' of the latent or hidden space ) and the corruption level. The

 53         constructor also receives symbolic variables for the input, weights and

 54         bias. Such a symbolic variables are useful when, for example the input

 55         is the result of some computations, or when weights are shared between

 56         the dA and an MLP layer. When dealing with SdAs this always happens,

 57         the dA on layer 2 gets as input the output of the dA on layer 1,

 58         and the weights of the dA are used in the second stage of training

 59         to construct an MLP.

 60 

 61         :type numpy_rng: numpy.random.RandomState

 62         :param numpy_rng: number random generator used to generate weights

 63 

 64         :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams

 65         :param theano_rng: Theano random generator; if None is given one is

 66                      generated based on a seed drawn from `rng`

 67 

 68         :type input: theano.tensor.TensorType

 69         :param input: a symbolic description of the input or None for

 70                       standalone dA

 71 

 72         :type n_hidden: int

 73         :param n_hidden:  number of hidden units

 74 

 75         :type W: theano.tensor.TensorType

 76         :param W: Theano variable pointing to a set of weights that should be

 77                   shared belong the dA and another architecture; if dA should

 78                   be standalone set this to None

 79 

 80         :type bhid: theano.tensor.TensorType

 81         :param bhid: Theano variable pointing to a set of biases values (for

 82                      hidden units) that should be shared belong dA and another

 83                      architecture; if dA should be standalone set this to None

 84 

 85         

 86 

 87         """

 88         #self.n_visible = n_visible

 89         self.n_hidden = n_hidden

 90 

 91         # create a Theano random generator that gives symbolic random values

 92         if not theano_rng:

 93             theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

 94 

 95         # note : W' was written as `W_prime` and b' as `b_prime`

 96         if not W:

 97             # W is initialized with `initial_W` which is uniformely sampled

 98             # from -4*sqrt(6./(n_visible+n_hidden)) and

 99             # 4*sqrt(6./(n_hidden+n_visible))the output of uniform if

100             # converted using asarray to dtype

101             # theano.config.floatX so that the code is runable on GPU

102             initial_W = numpy.asarray(

103                 numpy_rng.uniform(

104                     low=-4 * numpy.sqrt(6. / (n_hidden + n_hidden)),

105                     high=4 * numpy.sqrt(6. / (n_hidden + n_hidden)),

106                     size=(n_hidden, n_hidden)

107                 ),

108                 dtype=theano.config.floatX

109             )

110             W=theano.shared(value=initial_W, name='W', borrow=True)

111 

112         '''

113         if not bvis:

114             bvis = theano.shared(

115                 value=numpy.zeros(

116                     n_visible,

117                     dtype=theano.config.floatX

118                 ),

119                 borrow=True

120             )

121         '''

122         if not bhid:

123             bhid = theano.shared(

124                 value=numpy.zeros(

125                     n_hidden,

126                     dtype=theano.config.floatX

127                 ),

128                 name='b',

129                 borrow=True

130             )

131 

132         self.W = W

133         # b corresponds to the bias of the hidden

134         self.b = bhid

135         # b_prime corresponds to the bias of the visible

136         #self.b_prime = bvis

137         # tied weights, therefore W_prime is W transpose

138         #self.W_prime = self.W.T

139         self.theano_rng = theano_rng

140         # if no input is given, generate a variable representing the input

141         if input is None:

142             # we use a matrix because we expect a minibatch of several

143             # examples, each example being a row

144             self.x = T.dmatrix(name='input')

145         else:

146             self.x = input

147 

148         self.params = [self.W, self.b]

149     # end-snippet-1

150     def get_hidden_values(self):

151         """ Computes the values of the hidden layer """

152         return T.sum(T.nnet.sigmoid(T.dot(self.x, self.W) + self.b),axis = 0)

153 

154     '''

155     def get_corrupted_input(self, input, corruption_level):

156         """This function keeps ``1-corruption_level`` entries of the inputs the

157         same and zero-out randomly selected subset of size ``coruption_level``

158         Note : first argument of theano.rng.binomial is the shape(size) of

159                random numbers that it should produce

160                second argument is the number of trials

161                third argument is the probability of success of any trial

162 

163                 this will produce an array of 0s and 1s where 1 has a

164                 probability of 1 - ``corruption_level`` and 0 with

165                 ``corruption_level``

166 

167                 The binomial function return int64 data type by

168                 default.  int64 multiplicated by the input

169                 type(floatX) always return float64.  To keep all data

170                 in floatX when floatX is float32, we set the dtype of

171                 the binomial to floatX. As in our case the value of

172                 the binomial is always 0 or 1, this don't change the

173                 result. This is needed to allow the gpu to work

174                 correctly as it only support float32 for now.

175 

176         """

177         return self.theano_rng.binomial(size=input.shape, n=1,

178                                         p=1 - corruption_level,

179                                         dtype=theano.config.floatX) * input

180     '''

181     '''

182     

183     def get_reconstructed_input(self, hidden):

184         """Computes the reconstructed input given the values of the

185         hidden layer

186 

187         """

188         return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime)

189     

190 

191     def get_cost_updates(self, corruption_level, learning_rate):

192         """ This function computes the cost and the updates for one trainng

193         step of the dA """

194 

195         #tilde_x = self.get_corrupted_input(self.x, corruption_level)

196         y = self.get_hidden_values(tilde_x)

197         #z = self.get_reconstructed_input(y)

198         # note : we sum over the size of a datapoint; if we are using

199         #        minibatches, L will be a vector, with one entry per

200         #        example in minibatch

201         L = - T.sum(self.x * T.log(z) + (1 - self.x) * T.log(1 - z), axis=1)

202         # note : L is now a vector, where each element is the

203         #        cross-entropy cost of the reconstruction of the

204         #        corresponding example of the minibatch. We need to

205         #        compute the average of all these to get the cost of

206         #        the minibatch

207         cost = T.mean(L)

208 

209         # compute the gradients of the cost of the `dA` with respect

210         # to its parameters

211         gparams = T.grad(cost, self.params)

212         # generate the list of updates

213         updates = [

214             (param, param - learning_rate * gparam)

215             for param, gparam in zip(self.params, gparams)

216         ]

217 

218         return (cost, updates)

219     '''

220 

221 

222 x = T.fmatrix('x')  # question matrix

223 y = T.fmatrix('x')  # answer matrix

224 index = T.lscalar()

225 rng = numpy.random.RandomState(23455)

226 theano_rng = RandomStreams(rng.randint(2 ** 30))

227 n_hidden=2

228 learning_rate=0.1

229 da_q=[]

230 da_a=[]

231 for count in range(n_hidden):

232         da_q.append(dA(

233         numpy_rng=rng,

234         theano_rng=theano_rng,

235         input=x,

236         #n_visible=28 * 28,

237         n_hidden=100

238         ))

239     

240     

241 for count in range(n_hidden):

242         da_a.append(dA(

243         numpy_rng=rng,

244         theano_rng=theano_rng,

245         input=y,

246         #n_visible=28 * 28,

247         n_hidden=100

248         ))

249 cost_matrix=[]

250 for hid_index in range(n_hidden):

251         cost_matrix.append(T.sum(T.sqr(da_q[hid_index].get_hidden_values()-da_a[hid_index].get_hidden_values())/2))

252 cost=T.sum(cost_matrix)

253 params=da_q[0].params+da_a[hid_index].params

254 for hid_index in range(1,n_hidden):

255     params+=da_q[hid_index].params+da_a[hid_index].params

256 gparams=T.grad(cost, params)

257 updates = []

258 for param, gparam in zip(params, gparams):

259     updates.append((param, param - learning_rate * gparam))

260 db = shelve.open(r'data\training_data\training_data_30_50_1_9_games.dat')

261 x1=db['train_set1']

262 q,a=x1[0]

263 q1,a1=x1[1]

264 train_da = theano.function(

265         [index],

266         cost,

267         updates=updates,

268         givens={

269             x: x1[0][0],

270             y: x1[0][1]

271         }

272     )

273 print train_da(0)

274 [/code]

 

你可能感兴趣的:(Model)