Skip to content

Commit

Permalink
Update run_dsin.py
Browse files Browse the repository at this point in the history
  • Loading branch information
浅梦 authored Jun 30, 2019
1 parent be65ce9 commit bf210d7
Show file tree
Hide file tree
Showing 15 changed files with 25 additions and 165 deletions.
25 changes: 1 addition & 24 deletions deepctr/inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from itertools import chain

from tensorflow.python.keras.initializers import RandomNormal
from tensorflow.python.keras.layers import Concatenate, Dense, Embedding, Input, Reshape, add,Flatten
from tensorflow.python.keras.layers import Concatenate, Dense, Embedding, Input, add,Flatten
from tensorflow.python.keras.regularizers import l2

from .layers.sequence import SequencePoolingLayer
Expand Down Expand Up @@ -152,14 +152,6 @@ def get_varlen_pooling_list(embedding_dict, features, varlen_sparse_feature_colu
pooling_vec_list.append(vec)
return pooling_vec_list

# def get_pooling_vec_list(sequence_embed_dict, sequence_len_dict, sequence_max_len_dict, sequence_fd_list):
# if sequence_max_len_dict is None or sequence_len_dict is None:
# return [SequencePoolingLayer(feat.combiner, supports_masking=True)(sequence_embed_dict[feat.name]) for feat in
# sequence_fd_list]
# else:
# return [SequencePoolingLayer(feat.combiner, supports_masking=False)(
# [sequence_embed_dict[feat.name], sequence_len_dict[feat.name]]) for feat in sequence_fd_list]


def get_inputs_list(inputs):
return list(chain(*list(map(lambda x: x.values(), filter(lambda x: x is not None, inputs)))))
Expand Down Expand Up @@ -233,21 +225,6 @@ def get_dense_input(features,feature_columns):
return dense_input_list


# def get_varlen_vec_list(embedding_dict, features, varlen_sparse_feature_columns):
# vec_list = []
# for fc in varlen_sparse_feature_columns:
# feature_name = fc.name
# feature_length_name = feature_name + "_seq_length"
# if feature_length_name in features:
# vector = SequencePoolingLayer(fc.combiner, supports_masking=False)(
# [embedding_dict[feature_name], features[feature_length_name]])
# else:
# vector = SequencePoolingLayer(fc.combiner, supports_masking=True)(embedding_dict[feature_name])
# vec_list.append(vector)
# return vec_list



def input_from_feature_columns(features,feature_columns, embedding_size, l2_reg, init_std, seed,prefix='',seq_mask_zero=True):


Expand Down
6 changes: 2 additions & 4 deletions deepctr/models/dcn.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"""
import tensorflow as tf

from ..inputs import input_from_feature_columns,build_input_features
from ..inputs import input_from_feature_columns,build_input_features,combined_dnn_input
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import CrossNet
from ..layers.utils import concat_fun
Expand Down Expand Up @@ -38,16 +38,14 @@ def DCN(dnn_feature_columns, embedding_size='auto', cross_num=2, dnn_hidden_unit
if len(dnn_hidden_units) == 0 and cross_num == 0:
raise ValueError("Either hidden_layer or cross layer must > 0")

#check_feature_config_dict(feature_dim_dict)
features = build_input_features(dnn_feature_columns)
inputs_list = list(features.values())

sparse_embedding_list, dense_value_list = input_from_feature_columns(features,dnn_feature_columns,
embedding_size,
l2_reg_embedding, init_std,
seed)
#todo not support dense?
dnn_input = tf.keras.layers.Flatten()(concat_fun(sparse_embedding_list))
dnn_input = combined_dnn_input(sparse_embedding_list,dense_value_list)

if len(dnn_hidden_units) > 0 and cross_num > 0: # Deep & Cross
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
Expand Down
5 changes: 1 addition & 4 deletions deepctr/models/din.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,9 @@
[1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf)
"""

from collections import OrderedDict

from tensorflow.python.keras.initializers import RandomNormal
from tensorflow.python.keras.layers import Input, Dense, Embedding, Concatenate, Flatten
from tensorflow.python.keras.layers import Dense,Concatenate, Flatten
from tensorflow.python.keras.models import Model
from tensorflow.python.keras.regularizers import l2

from ..inputs import build_input_features,create_embedding_matrix,SparseFeat,VarLenSparseFeat,DenseFeat,embedding_lookup,get_dense_input,varlen_embedding_lookup,get_varlen_pooling_list,combined_dnn_input
from ..layers.core import DNN, PredictionLayer
Expand Down
48 changes: 5 additions & 43 deletions deepctr/models/dsin.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from ..layers.utils import NoMask, concat_fun


def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_count=5, sess_len_max=10, bias_encoding=False,
def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_count=5, bias_encoding=False,
att_embedding_size=1, att_head_num=8, dnn_hidden_units=(200, 80), dnn_activation='sigmoid', dnn_dropout=0,
dnn_use_bn=False, l2_reg_dnn=0, l2_reg_embedding=1e-6, init_std=0.0001, seed=1024, task='binary',
):
Expand All @@ -49,33 +49,12 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
:return: A Keras model instance.
"""
#check_feature_config_dict(dnn_feature_columns)

if (att_embedding_size * att_head_num != len(sess_feature_list) * embedding_size):
raise ValueError(
"len(session_feature_lsit) * embedding_size must equal to att_embedding_size * att_head_num ,got %d * %d != %d *%d" % (
len(sess_feature_list), embedding_size, att_embedding_size, att_head_num))

# sparse_input, dense_input, user_behavior_input_dict, _, user_sess_length = get_input(
# dnn_feature_columns, sess_feature_list, sess_max_count, sess_len_max)

# def get_input(feature_dim_dict, seq_feature_list, sess_max_count, seq_max_len):
# sparse_input, dense_input = build_input_features(feature_dim_dict)
# user_behavior_input = {}
# for idx in range(sess_max_count):
# sess_input = OrderedDict()
# for i, feat in enumerate(seq_feature_list):
# sess_input[feat] = Input(
# shape=(seq_max_len,), name='seq_' + str(idx) + str(i) + '-' + feat)
#
# user_behavior_input["sess_" + str(idx)] = sess_input
#
# user_behavior_length = {"sess_" + str(idx): Input(shape=(1,), name='seq_length' + str(idx)) for idx in
# range(sess_max_count)}
# user_sess_length = Input(shape=(1,), name='sess_length')
#
# return sparse_input, dense_input, user_behavior_input, user_behavior_length, user_sess_length


features = build_input_features(dnn_feature_columns)

Expand All @@ -85,15 +64,13 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
varlen_sparse_feature_columns = list(filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []


history_feature_columns = []

sparse_varlen_feature_columns = []
history_fc_names = list(map(lambda x: "sess" + x, sess_feature_list))
#user_behavior_input_dict = {"sess_"+str(i):{} for i in range(sess_max_count)}
for fc in varlen_sparse_feature_columns:
feature_name = fc.name
if feature_name in history_fc_names:
continue
#history_feature_columns.append(fc)
else:
sparse_varlen_feature_columns.append(fc)

Expand All @@ -106,13 +83,11 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
sess_input = OrderedDict()
for i, feat in enumerate(sess_feature_list):
sess_input[feat] = features["sess_"+str(idx)+"_"+feat]
#Input(shape=(seq_max_len,), name='seq_' + str(idx) + str(i) + '-' + feat)


user_behavior_input_dict["sess_" + str(idx)] = sess_input


#user_behavior_length = {"sess_" + str(idx): Input(shape=(1,), name='seq_length' + str(idx)) for idx in
# range(sess_max_count)}
user_sess_length = Input(shape=(1,), name='sess_length')


Expand All @@ -130,20 +105,12 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun


query_emb_list = embedding_lookup(embedding_dict,features,sparse_feature_columns,sess_feature_list,sess_feature_list)#query是单独的
keys_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, history_fc_names, history_fc_names)
dnn_input_emb_list = embedding_lookup(embedding_dict,features,sparse_feature_columns,mask_feat_list=sess_feature_list)
dense_value_list = get_dense_input(features, dense_feature_columns)




#query_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, dnn_feature_columns["sparse"],
# sess_feature_list, sess_feature_list)

query_emb = concat_fun(query_emb_list)

#dnn_input_emb_list = get_embedding_vec_list(sparse_embedding_dict, sparse_input, dnn_feature_columns["sparse"],
# mask_feat_list=sess_feature_list)

dnn_input_emb = concat_fun(dnn_input_emb_list)
dnn_input_emb = Flatten()(NoMask()(dnn_input_emb))

Expand All @@ -167,9 +134,7 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun

dnn_input_emb = Concatenate()(
[dnn_input_emb, Flatten()(interest_attention_layer), Flatten()(lstm_attention_layer)])
# if len(dense_input) > 0:
# deep_input_emb = Concatenate()(
# [deep_input_emb] + list(dense_input.values()))

dnn_input_emb = combined_dnn_input([dnn_input_emb],dense_value_list)
output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn,
dnn_dropout, dnn_use_bn, seed)(dnn_input_emb)
Expand All @@ -184,9 +149,6 @@ def DSIN(dnn_feature_columns, sess_feature_list, embedding_size=8, sess_max_coun
[user_behavior_input_dict[sess_name]]))
# sess_input_length_list.append(user_behavior_length_dict[sess_name])

# model_input_list = get_inputs_list([sparse_input, dense_input]) + sess_input_list + [
# user_sess_length]
#

model = Model(inputs=inputs_list+[user_sess_length], outputs=output)

Expand Down
54 changes: 2 additions & 52 deletions deepctr/models/fgcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,12 @@
"""
import tensorflow as tf

from ..inputs import build_input_features, get_linear_logit,input_from_feature_columns
from ..inputs import build_input_features, input_from_feature_columns
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import InnerProductLayer, FGCNNLayer
from ..layers.utils import concat_fun


# def preprocess_input_embedding(feature_dim_dict, embedding_size, l2_reg_embedding, l2_reg_linear, init_std, seed,
# return_linear_logit=True, ):
# sparse_input_dict, dense_input_dict = build_input_features(feature_dim_dict)
# sequence_input_dict, sequence_input_len_dict, sequence_max_len_dict = create_varlenfeat_inputdict(
# feature_dim_dict)
# inputs_list, deep_emb_list, linear_emb_list = get_inputs_embedding(None, feature_dim_dict, l2_reg_embedding,
# l2_reg_linear, init_std, seed, sparse_input_dict,
# dense_input_dict, sequence_input_dict,
# sequence_input_len_dict, sequence_max_len_dict,
# return_linear_logit, embedding_size, prefix='')
# _, fg_deep_emb_list, _ = get_inputs_embedding(None, feature_dim_dict, l2_reg_embedding, l2_reg_linear, init_std,
# seed, sparse_input_dict, dense_input_dict, sequence_input_dict,
# sequence_input_len_dict, sequence_max_len_dict, False, embedding_size,
# prefix='fg')
# if return_linear_logit:
# linear_logit = get_linear_logit(
# linear_emb_list, dense_input_dict, l2_reg_linear)
# else:
# linear_logit = None
# return deep_emb_list, fg_deep_emb_list, linear_logit, inputs_list


def unstack(input_tensor):
input_ = tf.expand_dims(input_tensor, axis=2)
Expand Down Expand Up @@ -65,8 +44,7 @@ def FGCNN(dnn_feature_columns, embedding_size=8, conv_kernel_width=(7, 7, 7, 7),
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
#todo 这个还没修改
#check_feature_config_dict(feature_dim_dict)

if not (len(conv_kernel_width) == len(conv_filters) == len(new_maps) == len(pooling_width)):
raise ValueError(
"conv_kernel_width,conv_filters,new_maps and pooling_width must have same length")
Expand All @@ -85,34 +63,6 @@ def FGCNN(dnn_feature_columns, embedding_size=8, conv_kernel_width=(7, 7, 7, 7),
seed,prefix='fg')


# sequence_input_dict, sequence_input_len_dict, sequence_max_len_dict = create_varlenfeat_inputdict(
# feature_dim_dict)
# inputs_list, deep_emb_list, linear_emb_list = get_inputs_embedding(None, feature_dim_dict, l2_reg_embedding,
# l2_reg_linear, init_std, seed, sparse_input_dict,
# dense_input_dict, sequence_input_dict,
# sequence_input_len_dict, sequence_max_len_dict,
# return_linear_logit, embedding_size, prefix='')
# _, fg_deep_emb_list, _ = get_inputs_embedding(None, feature_dim_dict, l2_reg_embedding, l2_reg_linear, init_std,
# seed, sparse_input_dict, dense_input_dict, sequence_input_dict,
# sequence_input_len_dict, sequence_max_len_dict, False, embedding_size,
# prefix='fg')
# if return_linear_logit:
# linear_logit = get_linear_logit(
# linear_emb_list, dense_input_dict, l2_reg_linear)
# else:
# linear_logit = None
# return deep_emb_list, fg_deep_emb_list, linear_logit, inputs_list






# deep_emb_list, fg_deep_emb_list, _, inputs_list = preprocess_input_embedding(dnn_feature_columns,
# embedding_size,
# l2_reg_embedding,
# 0, init_std,
# seed, False)
fg_input = concat_fun(fg_deep_emb_list, axis=1)
origin_input = concat_fun(deep_emb_list, axis=1)

Expand Down
8 changes: 2 additions & 6 deletions deepctr/models/mlr.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,12 @@ def MLR(region_feature_columns, base_feature_columns=None, region_num=4,
# raise ValueError(
# "feature_dim must be a dict like {'sparse':{'field_1':4,'field_2':3,'field_3':2},'dense':['field_5',]}")

same_flag = False

if base_feature_columns is None or len(base_feature_columns) == 0:
base_feature_columns = region_feature_columns
same_flag = True

if bias_feature_columns is None:
bias_feature_columns = []
#for feat in region_feature_columns['sparse'] + base_feature_columns['sparse'] + bias_feature_columns['sparse']:
# if feat.hash_flag:
# raise ValueError("Feature Hashing on the fly is no supported in MLR") #TODO:support feature hashing on the MLR


features = build_input_features(region_feature_columns + base_feature_columns+bias_feature_columns)

Expand Down
6 changes: 3 additions & 3 deletions deepctr/models/nfm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ def NFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidde
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
#check_feature_config_dict(linear_feature_columns)

features = build_input_features(linear_feature_columns + dnn_feature_columns)

Expand All @@ -44,16 +43,17 @@ def NFM(linear_feature_columns, dnn_feature_columns, embedding_size=8, dnn_hidde
embedding_size,
l2_reg_embedding,init_std,
seed)
#todo not support dense

linear_logit = get_linear_logit(features, linear_feature_columns, l2_reg=l2_reg_linear, init_std=init_std,
seed=seed, prefix='linear')

fm_input = concat_fun(sparse_embedding_list, axis=1)
bi_out = BiInteractionPooling()(fm_input)
if bi_dropout:
bi_out = tf.keras.layers.Dropout(bi_dropout)(bi_out, training=None)
dnn_input = combined_dnn_input([bi_out],dense_value_list)
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
False, seed)(bi_out)
False, seed)(dnn_input)
deep_logit = tf.keras.layers.Dense(
1, use_bias=False, activation=None)(deep_out)

Expand Down
7 changes: 3 additions & 4 deletions deepctr/models/pnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import tensorflow as tf

from ..inputs import input_from_feature_columns,build_input_features
from ..inputs import input_from_feature_columns,build_input_features,combined_dnn_input
from ..layers.core import PredictionLayer, DNN
from ..layers.interaction import InnerProductLayer, OutterProductLayer
from ..layers.utils import concat_fun
Expand All @@ -36,7 +36,6 @@ def PNN(dnn_feature_columns, embedding_size=8, dnn_hidden_units=(128, 128), l2_r
:param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
:return: A Keras model instance.
"""
#check_feature_config_dict(dnn_feature_columns)

if kernel_type not in ['mat', 'vec', 'num']:
raise ValueError("kernel_type must be mat,vec or num")
Expand All @@ -49,7 +48,6 @@ def PNN(dnn_feature_columns, embedding_size=8, dnn_hidden_units=(128, 128), l2_r
embedding_size,
l2_reg_embedding,init_std,
seed)
# todo note support dense
inner_product = tf.keras.layers.Flatten()(InnerProductLayer()(sparse_embedding_list))
outter_product = OutterProductLayer(kernel_type)(sparse_embedding_list)

Expand All @@ -69,8 +67,9 @@ def PNN(dnn_feature_columns, embedding_size=8, dnn_hidden_units=(128, 128), l2_r
else:
deep_input = linear_signal

dnn_input = combined_dnn_input([deep_input],dense_value_list)
deep_out = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout,
False, seed)(deep_input)
False, seed)(dnn_input)
deep_logit = tf.keras.layers.Dense(
1, use_bias=False, activation=None)(deep_out)

Expand Down
2 changes: 1 addition & 1 deletion examples/run_dsin.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def get_xy_fd(hash_flag=False):
if __name__ == "__main__":
x, y, feature_dim_dict, behavior_feature_list = get_xy_fd(True)

model = DSIN(feature_dim_dict, behavior_feature_list, sess_max_count=2, sess_len_max=4, embedding_size=4,
model = DSIN(feature_dim_dict, behavior_feature_list, sess_max_count=2, embedding_size=4,
dnn_hidden_units=[4, 4, 4], dnn_dropout=0.5, )

model.compile('adam', 'binary_crossentropy',
Expand Down
1 change: 0 additions & 1 deletion tests/models/DCN_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import pytest

from deepctr.inputs import SparseFeat
from deepctr.models import DCN
from ..utils import check_model, get_test_data,SAMPLE_SIZE

Expand Down
Loading

0 comments on commit bf210d7

Please sign in to comment.