环境 | 版本 | Python模块 | 版本 |
---|---|---|---|
Ubuntu | 16.04 | tensorflow | 1.8 |
Anaconda | 5.1 | numpy | |
Python | 3.6 | pandas | |
Jupyter lab | 0.31.5 | matplotlib |
NAMES = ['col'+str(e) for e in range(11)]
df_train = pd.read_csv('data/train.csv',names=NAMES,index_col=False)
huase_to_num = {'C':1,'D':2,'H':3,'S':4}
paimian_to_num = {'J':11,'Q':12,'K':13}
df_train=df_train.replace(huase_to_num).replace(paimian_to_num)
代码示例如下:
feature_columns = []
for col in NAMES[:-1]:
feature_columns.append(tf.feature_column.numeric_column(key=col))
feature_columns
ProximalAdagradOptimizer
具体代码如下:
cls = tf.estimator.DNNClassifier(
feature_columns=feature_columns,
hidden_units=[1536,768,384],
n_classes=numClasses,
optimizer=tf.train.ProximalAdagradOptimizer(
learning_rate=0.005,
l1_regularization_strength=0.001,
l2_regularization_strength=0.001
))
对已有的数据进行2000次训练,准确率如下图,20分钟训练模型2000次准确率99.5%
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
NAMES = ['col'+str(e) for e in range(11)]
NAMES_TEST = ['col'+str(e) for e in range(10)]
df_train = pd.read_csv('data/train.csv',names=NAMES,index_col=False)
df_test = pd.read_csv('data/ftest.csv',names=NAMES_TEST,index_col=False)
suit_num = {'C':1,'D':2,'H':3,'S':4} #将花色转成数值
poker_num = {'J':11,'Q':12,'K':13} #将牌面转成数值
df_train=df_train.replace(suit_num).replace(poker_num)
df_test=df_test.replace(suit_num).replace(poker_num)
df_train=df_train.apply(pd.to_numeric)
for col in NAMES[:-1]:
df_train[col] = df_train[col]-1
data_x = df_train[df_train.columns[:-1]]
# data_y = df_train[df_train.columns[-1]]
data_y = pd.DataFrame(df_train['col10']).applymap(str)['col10']
assert data_x.shape[0]==data_y.shape[0]
df_test=df_test.apply(pd.to_numeric)
for col in NAMES_TEST:
df_test[col] = df_test[col]-1
x_test = df_test
import torch
from torch.utils.data import Dataset, DataLoader
class PokerDataset(Dataset):
def __init__(self, x_dataframe, y_dataframe, transform=None):
self.x_tensor = torch.tensor(x_dataframe.values.astype('float')).float()
self.y_tensor = torch.tensor(y_dataframe.values.astype('float')).long()
def __len__(self):
return len(self.x_tensor)
def __getitem__(self, idx):
one_sample_x = self.x_tensor[idx]
one_sample_y = self.y_tensor[idx]
# sample = {'one_sample_x': one_sample_x, 'one_sample_y': one_sample_y}
return one_sample_x, one_sample_y
# GPU加速
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 100
learning_rate = 0.0001
poker_dataset = PokerDataset(x_dataframe=data_x, y_dataframe=data_y)
poker_dataloader = DataLoader(poker_dataset, batch_size=batch_size,
shuffle=True)
# 测试数据集
it = iter(poker_dataset)
next(it)
poker_dataset = PokerDataset(x_dataframe=data_x, y_dataframe=data_y)
poker_dataloader = DataLoader(poker_dataset, batch_size=batch_size,
shuffle=True)
# 测试数据集
it = iter(poker_dataset)
next(it)
from torch import optim
critirion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
model = PokerNet()
# print(loss(model(xb), yb))
import pdb
for epoch in range(20):
# running_loss = 0.0
for i, data in enumerate(poker_dataloader, 0):
# get the inputs
inputs, labels = data
# 将图模型梯度置0
optimizer.zero_grad()
# forward + backward + optimize
# pdb.set_trace()
outputs = net(inputs)
loss = critirion(outputs, labels)
loss.backward()
optimizer.step()
# print statistics
print(loss.item())
print('Finished Training')