手写数字识别-kaggle-99%

import pandas as pd
import tensorflow as tf
from tensorflow.keras import optimizers, Sequential, layers,regularizers
import random
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='2'
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession

config = ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
tf.random.set_seed(2345)

def preprocess(x, y):
    # [-1~1]
    x = tf.cast(x, dtype=tf.float32) / 255. - 0.5
    x = tf.reshape(x,[28,28,-1])
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y ,depth=10)
    return x,y

data_train = pd.read_csv(r'digit-recognizer/train.csv')
data_test = pd.read_csv(r'digit-recognizer/test.csv')
print(data_train.shape[0])
idx = [i for i in range(data_train.shape[0])]
random.shuffle(idx)
# print((idx))
# x_test = data_test.values
y_train = data_train.iloc[idx[:40000],0]
# print(y_train)
x_train = data_train.iloc[idx[:40000],1:].values
y_test = data_train.iloc[idx[40000:],0].values

x_test = data_train.iloc[idx[40000:],1:].values
print(x_train.shape,y_train.shape,x_test.shape,y_test.shape)
db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
db_test = tf.data.Dataset.from_tensor_slices((x_test,y_test))
db_train = db_train.map(preprocess).shuffle(10000).batch(128)
db_test = db_test.map(preprocess).batch(128)
example = next(iter(db_train))
print(example[0].shape, example[1].shape)
# model = Sequential()
model = Sequential()

model.add(layers.Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same',
                 activation ='relu', input_shape = (28,28,1)))
model.add(layers.Conv2D(filters = 32, kernel_size = (3,3),padding = 'Same',
                 activation ='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
                 activation ='relu'))
model.add(layers.Conv2D(filters = 64, kernel_size = (3,3),padding = 'Same',
                 activation ='relu'))
model.add(layers.MaxPool2D(pool_size=(2,2), strides=(2,2)))
model.add(layers.Dropout(0.25))

model.add(layers.Flatten())
model.add(layers.Dense(256, activation = "relu"))
model.add(layers.Dropout(0.4))
model.add(layers.Dense(10))

model.compile(optimizer=optimizers.RMSprop(0.0005),
              loss=tf.losses.CategoricalCrossentropy(from_logits=True),
              metrics=['accuracy']
              )
model.fit(db_train, epochs=15, validation_data=db_test, validation_freq=2)

x = tf.cast(data_test.values, dtype=tf.float32) / 255. - 0.5
x = tf.reshape(x, [-1, 28, 28, 1])
y = model.predict(x)
y = tf.argmax(y, axis=1)
pd.DataFrame(y).to_csv('1.csv')

你可能感兴趣的:(python)