class DCN(BaseEstimator, TransformerMixin):
def __init__(self, cate_feature_size, cate_field_size, num_feature_size,
embedding_size=8,
deep_each_layer_nums=[32, 32],
deep_drop_train_prob=[0.5, 0.5, 0.5],
cross_layer_num=3,
deep_layers_activation=tf.nn.relu,
epoches=10,
batch_size=256,
learning_rate=0.001,
optimizer_type="adam",
verbose=False,
random_seed=2019,
loss_type="logloss",
eval_metric=roc_auc_score,
l1_reg=0.0,
l2_reg=0.0,):
assert loss_type in ["logloss", "mse"], \
"loss_type can be either 'logloss' for classification task or 'mse' for regression task"
self.cate_feature_size = cate_feature_size
self.num_feature_size = num_feature_size
self.cate_field_size = cate_field_size
self.embedding_size = embedding_size
self.total_size = (self.cate_field_size+self.num_feature_size) * self.embedding_size
self.deep_each_layer_nums = deep_each_layer_nums
self.cross_layer_num = cross_layer_num
self.deep_drop_train_prob = deep_drop_train_prob
self.deep_layers_activation = deep_layers_activation
self.l1_reg = l1_reg
self.l2_reg = l2_reg
self.epoches = epoches
self.batch_size = batch_size
self.learning_rate = learning_rate
self.optimizer_type = optimizer_type
self.verbose = verbose
self.random_seed = random_seed
self.loss_type = loss_type
self.eval_metric = eval_metric
self._init_graph()
def _init_graph(self):
self.graph = tf.Graph()
with self.graph.as_default():
tf.set_random_seed(self.random_seed)
self.feat_cate = tf.placeholder(tf.int32, [None, None], name='feat_cate')
self.feat_num = tf.placeholder(tf.float32, [None, None], name='feat_num')
self.label = tf.placeholder(tf.float32, [None, 1], 'label')
self.deep_drop_prob = tf.placeholder(tf.float32, [None], 'deep_drop_prob')
self.weights = self._init_weights()
cate_embeddings = tf.nn.embedding_lookup(
self.weights['cate_embeddings'], self.feat_cate)
num_embeddings = tf.multiply(
self.weights['num_embeddings'], tf.reshape(self.feat_num, [-1, self.num_feature_size, 1]))
x0 = tf.concat([
tf.reshape(cate_embeddings, [-1, self.cate_field_size*self.embedding_size]),
tf.reshape(num_embeddings, [-1, self.num_feature_size*self.embedding_size])], axis=1)
# deep part
y_deep = tf.nn.dropout(x0, self.deep_drop_prob[0])
y_deep = tf.multiply(y_deep, 1.0/self.deep_drop_prob[0])
for i in range(len(self.deep_each_layer_nums)):
y_deep = tf.add(
tf.matmul(y_deep,self.weights["deep_layer_%d" %i]), self.weights["deep_bias_%d"%i])
y_deep = self.deep_layers_activation(y_deep)
y_deep = tf.nn.dropout(y_deep, self.deep_drop_prob[i+1])
y_deep = tf.multiply(y_deep, 1.0/self.deep_drop_prob[i+1])
# cross part
x0 = tf.reshape(x0, [-1, self.total_size, 1])
x_l = x0
for l in range(self.cross_layer_num):
x_l = tf.tensordot(
tf.matmul(x0, x_l, transpose_b=True), self.weights['cross_layer_%d' % l]
, 1) + self.weights["cross_bias_%d" % l] + x_l
cross_network_out = tf.reshape(x_l, (-1, self.total_size))
# concat part
concat_input = tf.concat([y_deep, cross_network_out], axis=1)
logits = tf.add(tf.matmul(concat_input,self.weights['concat_weight']),
self.weights['concat_bias'])
if self.loss_type == "logloss":
self.y_hat = tf.nn.sigmoid(logits)
loss = tf.losses.log_loss(self.label, self.y_hat)
elif self.loss_type == 'regloss':
self.y_hat = logits
loss = tf.losses.mean_squared_error(self.label, logits)
if self.l1_reg > 0 or self.l2_reg > 0:
loss += tf.contrib.layers.l1_l2_regularizer(
self.l1_reg, self.l2_reg)(self.weights["concat_weight"])
for i in range(len(self.deep_each_layer_nums)):
loss += tf.contrib.layers.l1_l2_regularizer(
self.l1_reg, self.l2_reg)(self.weights["deep_layer_%d" % i])
for i in range(self.cross_layer_num):
loss += tf.contrib.layers.l1_l2_regularizer(
self.l1_reg, self.l2_reg)(self.weights["cross_layer_%d" % i])
if self.optimizer_type == "adam":
self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.9, beta2=0.999,
epsilon=1e-8).minimize(loss)
# init
self.saver = tf.train.Saver()
init = tf.global_variables_initializer()
self.sess = tf.Session()
self.sess.run(init)
# number of params
total_parameters = 0
for variable in self.weights.values():
shape = variable.get_shape()
variable_parameters = 1
for dim in shape:
variable_parameters *= dim.value
total_parameters += variable_parameters
if self.verbose > 0:
print("#params: %d" % total_parameters)
def _init_weights(self):
weights = dict()
#embeddings
weights['cate_embeddings'] = tf.Variable(
tf.random_normal([self.cate_feature_size, self.embedding_size], 0.0, 0.1),
name='cate_embeddings')
weights['num_embeddings'] = tf.Variable(
tf.random_normal([self.num_feature_size, self.embedding_size], 0.0, 0.1),
name='num_embeddings')
# weights['feature_bias'] = tf.Variable(
# tf.random_normal([self.cate_feature_size,1],0.0,1.0),name='feature_bias')
#deep layers
layer_nums = len(self.deep_each_layer_nums)
glorot = np.sqrt(2.0/(self.total_size + self.deep_each_layer_nums[0]))
weights['deep_layer_0'] = tf.Variable(
np.random.normal(loc=0,scale=glorot,size=(self.total_size,self.deep_each_layer_nums[0])),
dtype=np.float32)
weights['deep_bias_0'] = tf.Variable(
np.random.normal(loc=0,scale=glorot,size=(1,self.deep_each_layer_nums[0])),
dtype=np.float32)
for i in range(1,layer_nums):
glorot = np.sqrt(2.0 / (self.deep_each_layer_nums[i - 1] + self.deep_each_layer_nums[i]))
weights["deep_layer_%d" % i] = tf.Variable(
np.random.normal(loc=0, scale=glorot, size=(self.deep_each_layer_nums[i - 1], self.deep_each_layer_nums[i])),
dtype=np.float32) # layers[i-1] * layers[i]
weights["deep_bias_%d" % i] = tf.Variable(
np.random.normal(loc=0, scale=glorot, size=(1, self.deep_each_layer_nums[i])),
dtype=np.float32) # 1 * layer[i]
#cross layer
for i in range(self.cross_layer_num):
glorot = np.sqrt(2.0 / (self.total_size + 1))
weights["cross_layer_%d" % i] = tf.Variable(
np.random.normal(loc=0, scale=glorot, size=(self.total_size, 1)),
dtype=np.float32) # layers[i-1] * layers[i]
weights["cross_bias_%d" % i] = tf.Variable(
np.random.normal(loc=0, scale=glorot, size=(self.total_size, 1)),
dtype=np.float32) # 1 * layer[i]
final_input_size = self.total_size + self.deep_each_layer_nums[-1]
glorot = np.sqrt(2.0/(final_input_size + 1))
weights['concat_weight'] = tf.Variable(
np.random.normal(loc=0,scale=glorot,size=(final_input_size,1)),
dtype=np.float32)
weights['concat_bias'] = tf.Variable(
tf.constant(0.01),
dtype=np.float32)
return weights
def get_batch(self, Xc, Xn, y, batch_size, index):
start = index * batch_size
end = (index + 1) * batch_size
end = end if end < len(y) else len(y)
return Xc[start:end], Xn[start:end], y[start:end]
def get_batch_without_label(self, Xc, Xn, batch_size, index):
start = index * batch_size
end = (index + 1) * batch_size
end = end if end < len(Xc) else len(Xc)
return Xc[start:end], Xn[start:end]
# shuffle three lists simutaneously
def shuffle(self, a, b, c):
rng_state = np.random.get_state()
np.random.shuffle(a)
np.random.set_state(rng_state)
np.random.shuffle(b)
np.random.set_state(rng_state)
np.random.shuffle(c)
def predict(self, Xc, Xn):
total_batch = int(np.ceil(len(Xc)/self.batch_size))
predict = []
for i in range(total_batch):
cate_batch, num_batch = self.get_batch_without_label(
Xc, Xn, self.batch_size, i)
feed_dict = {self.feat_cate: cate_batch,
self.feat_num: num_batch,
self.deep_drop_prob: [1.0] * len(self.deep_drop_train_prob),}
y_hat = self.sess.run(self.y_hat, feed_dict=feed_dict)
predict.append(y_hat)
return np.concatenate(predict)
def evaluate(self, Xc, Xn, y):
predict = self.predict(Xc, Xn).tolist()
auc = roc_auc_score([x[0] for x in y], [x[0] for x in predict])
return auc
def fit_on_batch(self, Xc, Xn, y):
feed_dict = {self.feat_cate: Xc,
self.feat_num: Xn,
self.label: y,
self.deep_drop_prob: self.deep_drop_train_prob,}
opt = self.sess.run(self.optimizer,feed_dict=feed_dict)
return
def fit(self, cate_train, num_train, y_train,
cate_valid=None, num_valid=None, y_valid=None,
early_stopping=False, epoches=10, verbose=1):
for epoch in range(epoches):
self.shuffle(cate_train, num_train, y_train)
total_batch = len(y_train) // self.batch_size
for i in range(total_batch):
cate_batch, num_batch, y_batch = self.get_batch(
cate_train, num_train, y_train, self.batch_size, i)
self.fit_on_batch(cate_batch, num_batch, y_batch)
if verbose:
tra_auc = self.evaluate(cate_train, num_train, y_train)
if y_valid:
val_auc = self.evaluate(cate_valid, num_valid, y_valid)
print("epoch:",epoch, 'tra_auc', tra_auc, "val_auc",val_auc)
else:
print("epoch:",epoch, 'tra_auc', tra_auc)