最近在做一个Kaggle比赛, 于此分享一下所使用的DL模型(经过简化),希望能对初学者有所帮助。
(最后结果,公榜23/4512,私榜87/4512,过拟合了啊啊啊啊T.T)
比赛地址:
https://www.kaggle.com/c/jigsaw-toxic-comment-classification-challenge
模型中所使用的词嵌入请参看如下博客:
http://blog.csdn.net/leyounger/article/details/78949709
https://www.kaggle.com/jagangupta/lessons-from-toxic-blending-is-the-new-sexy
inp = Input(shape=(MAX_SEQUENCE_LENGTH, ))
x_3 = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[get_embeddings()],
input_length=MAX_SEQUENCE_LENGTH,
trainable=False)(inp)
x_3 = SpatialDropout1D(0.2)(x_3)
x_3 = Bidirectional(GRU(128, return_sequences=True, dropout=0.2, kernel_initializer='glorot_uniform'),merge_mode='concat')(x_3)
avg_pool_3 = GlobalAveragePooling1D()(x_3)
max_pool_3 = GlobalMaxPooling1D()(x_3)
attention_3 = Attention()(x_3)
x = keras.layers.concatenate([avg_pool_3, max_pool_3, attention_3])
x = Dense(6, activation="sigmoid")(x)
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
sgd = keras.optimizers.SGD(lr=0.001)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy',
optimizer=adam,
metrics=['accuracy', 'binary_crossentropy'])
inp = Input(shape=(MAX_SEQUENCE_LENGTH, ))
x_3 = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[get_embeddings()],
input_length=MAX_SEQUENCE_LENGTH,
trainable=False)(inp)
x_3 = SpatialDropout1D(0.2)(x_3)
cnn1 = Conv1D(256, 2, padding='same', strides=1, activation='relu')(x_3)
cnn2 = Conv1D(256, 3, padding='same', strides=1, activation='relu')(x_3)
cnn3 = Conv1D(256, 4, padding='same', strides=1, activation='relu')(x_3)
cnn4 = Conv1D(256, 5, padding='same', strides=1, activation='relu')(x_3)
cnn5 = Conv1D(256, 6, padding='same', strides=1, activation='relu')(x_3)
cnn = keras.layers.concatenate([cnn1, cnn2, cnn3, cnn4, cnn5], axis=-1)
cnn1 = Conv1D(128, 3, padding='same', strides=1, activation='relu')(cnn)
cnn1 = MaxPooling1D(pool_size=200)(cnn1)
cnn2 = Conv1D(128, 4, padding='same', strides=1, activation='relu')(cnn)
cnn2 = MaxPooling1D(pool_size=200)(cnn2)
cnn3 = Conv1D(128, 5, padding='same', strides=1, activation='relu')(cnn)
cnn3 = MaxPooling1D(pool_size=200)(cnn3)
cnn = keras.layers.concatenate([cnn1, cnn2, cnn3], axis=-1)
x = Flatten()(cnn)
x = Dropout(0.2)(x)
x = Dense(128, kernel_initializer='he_normal')(x)
x = PReLU()(x)
x = BatchNormalization()(x)
x = Dropout(0.2)(x)
x = Dense(6, activation="sigmoid")(x)
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
sgd = keras.optimizers.SGD(lr=0.001)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy',
optimizer=adam,
metrics=['accuracy', 'binary_crossentropy'])
inp = Input(shape=(MAX_SEQUENCE_LENGTH, ))
x_4 = Embedding(len(word_index) + 1,
EMBEDDING_DIM,
weights=[get_embeddings()],
input_length=MAX_SEQUENCE_LENGTH,
trainable=False)(inp)
x_3 = SpatialDropout1D(0.2)(x_4)
x_3 = Bidirectional(GRU(196, return_sequences=True, dropout=0.2, kernel_initializer='he_normal'),
merge_mode='concat')(x_3)
x_3 = Conv1D(96, kernel_size=3, padding="valid", kernel_initializer="glorot_uniform")(x_3)
avg_pool_3 = GlobalAveragePooling1D()(x_3)
max_pool_3 = GlobalMaxPooling1D()(x_3)
att_3 = Attention()(x_3)
x = keras.layers.concatenate([avg_pool_3, max_pool_3, att_3])
x = Dense(6, activation="sigmoid")(x)
adam = keras.optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
sgd = keras.optimizers.SGD(lr=0.001)
model = Model(inputs=inp, outputs=x)
model.compile(loss='binary_crossentropy',
optimizer=adam,
metrics=['accuracy', 'binary_crossentropy'])