diff --git a/.travis.yml b/.travis.yml index 87324b97..4470dfa0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -59,7 +59,7 @@ script: notifications: recipients: - - wcshen1994@163.com + - weichenswc@163.com on_success: change on_failure: change diff --git a/README.md b/README.md index 48e8e81f..7404a224 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ [![Documentation Status](https://readthedocs.org/projects/deepctr-doc/badge/?version=latest)](https://deepctr-doc.readthedocs.io/) ![CI status](https://github.com/shenweichen/deepctr/workflows/CI/badge.svg) [![Coverage Status](https://coveralls.io/repos/github/shenweichen/DeepCTR/badge.svg?branch=master)](https://coveralls.io/github/shenweichen/DeepCTR?branch=master) -[![Codacy Badge](https://api.codacy.com/project/badge/Grade/d4099734dc0e4bab91d332ead8c0bdd0)](https://www.codacy.com/app/wcshen1994/DeepCTR?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/DeepCTR&utm_campaign=Badge_Grade) +[![Codacy Badge](https://api.codacy.com/project/badge/Grade/d4099734dc0e4bab91d332ead8c0bdd0)](https://www.codacy.com/gh/shenweichen/DeepCTR?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/DeepCTR&utm_campaign=Badge_Grade) [![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#DisscussionGroup) [![License](https://img.shields.io/github/license/shenweichen/deepctr.svg)](https://github.com/shenweichen/deepctr/blob/master/LICENSE) @@ -54,6 +54,7 @@ Let's [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Star | Deep Session Interest Network | [IJCAI 2019][Deep Session Interest Network for Click-Through Rate Prediction ](https://arxiv.org/abs/1905.06482) | | FiBiNET | [RecSys 2019][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) | | FLEN | [arxiv 2019][FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690.pdf) | +| BST | [DLP-KDD 2019][Behavior sequence transformer for e-commerce recommendation in Alibaba](https://arxiv.org/pdf/1905.06874.pdf) | | DCN V2 | [arxiv 2020][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535) | ## Citation @@ -82,3 +83,38 @@ If you find this code useful in your research, please cite it using the followin ![wechat](./docs/pics/code.png) + +## Main contributors([welcome to join us!](./CONTRIBUTING.md)) + + + + + + + + + + + +
+ ​ pic
+ ​ Shen Weichen ​ +

+ Alibaba Group

​ +
+ pic
+ Zan Shuxun ​ +

Beijing University
of Posts and
Telecommunications

​ +
+ ​ pic
+ ​ Harshit Pande +

Amazon

​ +
+ ​ pic
+ ​ Li Zichao +

Peking University

​ +
+ ​ pic
+ LeoCai +

Chongqing University
of Posts and
Telecommunications

​ +
diff --git a/deepctr/__init__.py b/deepctr/__init__.py index b268aaeb..b3b93e89 100644 --- a/deepctr/__init__.py +++ b/deepctr/__init__.py @@ -1,4 +1,4 @@ -from .utils import check_version - -__version__ = '0.8.3' -check_version(__version__) +from .utils import check_version + +__version__ = '0.8.5' +check_version(__version__) diff --git a/deepctr/estimator/models/afm.py b/deepctr/estimator/models/afm.py index b56282dc..e53e76d4 100644 --- a/deepctr/estimator/models/afm.py +++ b/deepctr/estimator/models/afm.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. diff --git a/deepctr/estimator/models/autoint.py b/deepctr/estimator/models/autoint.py index 40e3b1e1..843f41e6 100644 --- a/deepctr/estimator/models/autoint.py +++ b/deepctr/estimator/models/autoint.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921) diff --git a/deepctr/estimator/models/ccpm.py b/deepctr/estimator/models/ccpm.py index cc788a38..0bae78fa 100644 --- a/deepctr/estimator/models/ccpm.py +++ b/deepctr/estimator/models/ccpm.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746. diff --git a/deepctr/estimator/models/dcn.py b/deepctr/estimator/models/dcn.py index ceba2b6b..78610be9 100644 --- a/deepctr/estimator/models/dcn.py +++ b/deepctr/estimator/models/dcn.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123) diff --git a/deepctr/estimator/models/deepfm.py b/deepctr/estimator/models/deepfm.py index 0021b506..25c311d7 100644 --- a/deepctr/estimator/models/deepfm.py +++ b/deepctr/estimator/models/deepfm.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247) diff --git a/deepctr/estimator/models/fibinet.py b/deepctr/estimator/models/fibinet.py index 619f4f8e..1fc25a1f 100644 --- a/deepctr/estimator/models/fibinet.py +++ b/deepctr/estimator/models/fibinet.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019. diff --git a/deepctr/estimator/models/fnn.py b/deepctr/estimator/models/fnn.py index aeb7de1a..f2270a06 100644 --- a/deepctr/estimator/models/fnn.py +++ b/deepctr/estimator/models/fnn.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf) diff --git a/deepctr/estimator/models/fwfm.py b/deepctr/estimator/models/fwfm.py index f5de40ab..69b1fa37 100644 --- a/deepctr/estimator/models/fwfm.py +++ b/deepctr/estimator/models/fwfm.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Harshit Pande Reference: diff --git a/deepctr/estimator/models/nfm.py b/deepctr/estimator/models/nfm.py index ff10b776..cabab4ce 100644 --- a/deepctr/estimator/models/nfm.py +++ b/deepctr/estimator/models/nfm.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027) diff --git a/deepctr/estimator/models/pnn.py b/deepctr/estimator/models/pnn.py index add1da8b..9dcdb5cc 100644 --- a/deepctr/estimator/models/pnn.py +++ b/deepctr/estimator/models/pnn.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf) diff --git a/deepctr/estimator/models/wdl.py b/deepctr/estimator/models/wdl.py index 381d5f28..482c03b6 100644 --- a/deepctr/estimator/models/wdl.py +++ b/deepctr/estimator/models/wdl.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf) diff --git a/deepctr/estimator/models/xdeepfm.py b/deepctr/estimator/models/xdeepfm.py index 65a0658a..b14a143c 100644 --- a/deepctr/estimator/models/xdeepfm.py +++ b/deepctr/estimator/models/xdeepfm.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf) diff --git a/deepctr/inputs.py b/deepctr/inputs.py index 92019a85..a36e4e9b 100644 --- a/deepctr/inputs.py +++ b/deepctr/inputs.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com """ diff --git a/deepctr/layers/activation.py b/deepctr/layers/activation.py index 1b97b4d7..5e55945f 100644 --- a/deepctr/layers/activation.py +++ b/deepctr/layers/activation.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com """ diff --git a/deepctr/layers/core.py b/deepctr/layers/core.py index f81bf97b..6ee4b77b 100644 --- a/deepctr/layers/core.py +++ b/deepctr/layers/core.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com """ diff --git a/deepctr/layers/interaction.py b/deepctr/layers/interaction.py index 87e159d1..fa32f047 100644 --- a/deepctr/layers/interaction.py +++ b/deepctr/layers/interaction.py @@ -2,7 +2,7 @@ """ Authors: - Weichen Shen,wcshen1994@163.com, + Weichen Shen,weichenswc@163.com, Harshit Pande """ diff --git a/deepctr/layers/normalization.py b/deepctr/layers/normalization.py index 7eec63ba..aa9d392c 100644 --- a/deepctr/layers/normalization.py +++ b/deepctr/layers/normalization.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com """ diff --git a/deepctr/layers/sequence.py b/deepctr/layers/sequence.py index 3c767a07..4160fb11 100644 --- a/deepctr/layers/sequence.py +++ b/deepctr/layers/sequence.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com """ @@ -79,7 +79,7 @@ def call(self, seq_value_len_list, mask=None, **kwargs): mask = tf.tile(mask, [1, 1, embedding_size]) if self.mode == "max": - hist = uiseq_embed_list - (1-mask) * 1e9 + hist = uiseq_embed_list - (1 - mask) * 1e9 return reduce_max(hist, 1, keep_dims=True) hist = reduce_sum(uiseq_embed_list * mask, 1, keep_dims=False) @@ -417,12 +417,12 @@ class Transformer(Layer): """ Simplified version of Transformer proposed in 《Attention is all you need》 Input shape - - a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if supports_masking=True. - - a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if supports_masking=False. + - a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if ``supports_masking=True`` . + - a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if ``supports_masking=False`` . Output shape - - 3D tensor with shape: ``(batch_size, 1, input_dim)``. + - 3D tensor with shape: ``(batch_size, 1, input_dim)`` if ``output_type='mean'`` or ``output_type='sum'`` , else ``(batch_size, timesteps, input_dim)`` . Arguments @@ -436,6 +436,8 @@ class Transformer(Layer): - **blinding**: bool. Whether or not use blinding. - **seed**: A Python integer to use as random seed. - **supports_masking**:bool. Whether or not support masking. + - **attention_type**: str, Type of attention, the value must be one of { ``'scaled_dot_product'`` , ``'additive'`` }. + - **output_type**: ``'mean'`` , ``'sum'`` or `None`. Whether or not use average/sum pooling for output. References - [Vaswani, Ashish, et al. "Attention is all you need." Advances in Neural Information Processing Systems. 2017.](https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf) @@ -443,7 +445,7 @@ class Transformer(Layer): def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_positional_encoding=True, use_res=True, use_feed_forward=True, use_layer_norm=False, blinding=True, seed=1024, supports_masking=False, - **kwargs): + attention_type="scaled_dot_product", output_type="mean", **kwargs): if head_num <= 0: raise ValueError('head_num must be a int > 0') self.att_embedding_size = att_embedding_size @@ -456,6 +458,8 @@ def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_posit self.dropout_rate = dropout_rate self.use_layer_norm = use_layer_norm self.blinding = blinding + self.attention_type = attention_type + self.output_type = output_type super(Transformer, self).__init__(**kwargs) self.supports_masking = supports_masking @@ -464,7 +468,7 @@ def build(self, input_shape): if self.num_units != embedding_size: raise ValueError( "att_embedding_size * head_num must equal the last dimension size of inputs,got %d * %d != %d" % ( - self.att_embedding_size, self.head_num, embedding_size)) + self.att_embedding_size, self.head_num, embedding_size)) self.seq_len_max = int(input_shape[0][-2]) self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, @@ -475,6 +479,11 @@ def build(self, input_shape): self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, initializer=tf.keras.initializers.TruncatedNormal(seed=self.seed + 2)) + if self.attention_type == "additive": + self.b = self.add_weight('b', shape=[self.att_embedding_size], dtype=tf.float32, + initializer=tf.keras.initializers.glorot_uniform(seed=self.seed)) + self.v = self.add_weight('v', shape=[self.att_embedding_size], dtype=tf.float32, + initializer=tf.keras.initializers.glorot_uniform(seed=self.seed)) # if self.use_res: # self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32, # initializer=tf.keras.initializers.TruncatedNormal(seed=self.seed)) @@ -525,10 +534,18 @@ def call(self, inputs, mask=None, training=None, **kwargs): keys = tf.concat(tf.split(keys, self.head_num, axis=2), axis=0) values = tf.concat(tf.split(values, self.head_num, axis=2), axis=0) - # head_num*None T_q T_k - outputs = tf.matmul(querys, keys, transpose_b=True) + if self.attention_type == "scaled_dot_product": + # head_num*None T_q T_k + outputs = tf.matmul(querys, keys, transpose_b=True) - outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5) + outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5) + elif self.attention_type == "additive": + querys_reshaped = tf.expand_dims(querys, axis=-2) + keys_reshaped = tf.expand_dims(keys, axis=-3) + outputs = tf.tanh(tf.nn.bias_add(querys_reshaped + keys_reshaped, self.b)) + outputs = tf.squeeze(tf.tensordot(outputs, tf.expand_dims(self.v, axis=-1), axes=[-1, 0]), axis=-1) + else: + NotImplementedError key_masks = tf.tile(key_masks, [self.head_num, 1]) @@ -579,7 +596,12 @@ def call(self, inputs, mask=None, training=None, **kwargs): if self.use_layer_norm: result = self.ln(result) - return reduce_mean(result, axis=1, keep_dims=True) + if self.output_type == "mean": + return reduce_mean(result, axis=1, keep_dims=True) + elif self.output_type == "sum": + return reduce_sum(result, axis=1, keep_dims=True) + else: + return result def compute_output_shape(self, input_shape): @@ -593,7 +615,7 @@ def get_config(self, ): 'dropout_rate': self.dropout_rate, 'use_res': self.use_res, 'use_positional_encoding': self.use_positional_encoding, 'use_feed_forward': self.use_feed_forward, 'use_layer_norm': self.use_layer_norm, 'seed': self.seed, 'supports_masking': self.supports_masking, - 'blinding': self.blinding} + 'blinding': self.blinding, 'attention_type': self.attention_type, 'output_type': self.output_type} base_config = super(Transformer, self).get_config() return dict(list(base_config.items()) + list(config.items())) diff --git a/deepctr/layers/utils.py b/deepctr/layers/utils.py index d13928a4..ca73d6a3 100644 --- a/deepctr/layers/utils.py +++ b/deepctr/layers/utils.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com """ import tensorflow as tf diff --git a/deepctr/models/__init__.py b/deepctr/models/__init__.py index 9e77e40d..217b357b 100644 --- a/deepctr/models/__init__.py +++ b/deepctr/models/__init__.py @@ -18,6 +18,7 @@ from .fibinet import FiBiNET from .flen import FLEN from .fwfm import FwFM +from .bst import BST __all__ = ["AFM", "CCPM", "DCN", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN", - "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM"] + "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST"] diff --git a/deepctr/models/afm.py b/deepctr/models/afm.py index dd664373..3f5ea7d1 100644 --- a/deepctr/models/afm.py +++ b/deepctr/models/afm.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017. diff --git a/deepctr/models/autoint.py b/deepctr/models/autoint.py index c742e3c2..1818e6a0 100644 --- a/deepctr/models/autoint.py +++ b/deepctr/models/autoint.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921) diff --git a/deepctr/models/bst.py b/deepctr/models/bst.py new file mode 100644 index 00000000..ba5a8bb8 --- /dev/null +++ b/deepctr/models/bst.py @@ -0,0 +1,107 @@ +# -*- coding:utf-8 -*- +""" +Author: + Zichao Li, 2843656167@qq.com + +Reference: + Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:https://doi.org/10.1145/3326937.3341261 +""" + +import tensorflow as tf +from tensorflow.python.keras.layers import (Dense, Flatten) + +from ..feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features +from ..inputs import get_varlen_pooling_list, create_embedding_matrix, embedding_lookup, varlen_embedding_lookup, \ + get_dense_input +from ..layers.core import DNN, PredictionLayer +from ..layers.sequence import Transformer, AttentionSequencePoolingLayer +from ..layers.utils import concat_func, combined_dnn_input + + +def BST(dnn_feature_columns, history_feature_list, transformer_num=1, att_head_num=8, + use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', l2_reg_dnn=0, + l2_reg_embedding=1e-6, dnn_dropout=0.0, seed=1024, task='binary'): + """Instantiates the BST architecture. + + :param dnn_feature_columns: An iterable containing all the features used by deep part of the model. + :param history_feature_list: list, to indicate sequence sparse field. + :param transformer_num: int, the number of transformer layer. + :param att_head_num: int, the number of heads in multi-head self attention. + :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net + :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN + :param dnn_activation: Activation function to use in DNN + :param l2_reg_dnn: float. L2 regularizer strength applied to DNN + :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector + :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate. + :param seed: integer ,to use as random seed. + :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss + :return: A Keras model instance. + + """ + + features = build_input_features(dnn_feature_columns) + inputs_list = list(features.values()) + + user_behavior_length = features["seq_length"] + + sparse_feature_columns = list( + filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] + dense_feature_columns = list( + filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else [] + varlen_sparse_feature_columns = list( + filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else [] + + history_feature_columns = [] + sparse_varlen_feature_columns = [] + history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list)) + + for fc in varlen_sparse_feature_columns: + feature_name = fc.name + if feature_name in history_fc_names: + history_feature_columns.append(fc) + else: + sparse_varlen_feature_columns.append(fc) + + embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="", + seq_mask_zero=True) + + query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, + return_feat_list=history_feature_list, to_list=True) + hist_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns, + return_feat_list=history_fc_names, to_list=True) + dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns, + mask_feat_list=history_feature_list, to_list=True) + dense_value_list = get_dense_input(features, dense_feature_columns) + sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns) + sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns, + to_list=True) + + dnn_input_emb_list += sequence_embed_list + query_emb = concat_func(query_emb_list) + deep_input_emb = concat_func(dnn_input_emb_list) + hist_emb = concat_func(hist_emb_list) + + transformer_output = hist_emb + for i in range(transformer_num): + att_embedding_size = transformer_output.get_shape().as_list()[-1] // att_head_num + transformer_layer = Transformer(att_embedding_size=att_embedding_size, head_num=att_head_num, + dropout_rate=dnn_dropout, use_positional_encoding=True, use_res=True, + use_feed_forward=True, use_layer_norm=True, blinding=False, seed=seed, + supports_masking=False, output_type=None) + transformer_output = transformer_layer([transformer_output, transformer_output, + user_behavior_length, user_behavior_length]) + + attn_output = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True, + supports_masking=False)([query_emb, transformer_output, + user_behavior_length]) + deep_input_emb = concat_func([deep_input_emb, attn_output], axis=-1) + deep_input_emb = Flatten()(deep_input_emb) + + dnn_input = combined_dnn_input([deep_input_emb], dense_value_list) + output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed=seed)(dnn_input) + final_logit = Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(output) + output = PredictionLayer(task)(final_logit) + + model = tf.keras.models.Model(inputs=inputs_list, outputs=output) + + return model diff --git a/deepctr/models/ccpm.py b/deepctr/models/ccpm.py index cfe0013f..05c8e5f1 100644 --- a/deepctr/models/ccpm.py +++ b/deepctr/models/ccpm.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746. diff --git a/deepctr/models/dcn.py b/deepctr/models/dcn.py index 5a14f5a7..69d23e58 100644 --- a/deepctr/models/dcn.py +++ b/deepctr/models/dcn.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Shuxun Zan, zanshuxun@aliyun.com diff --git a/deepctr/models/dcnmix.py b/deepctr/models/dcnmix.py index ea62cd24..7b257643 100644 --- a/deepctr/models/dcnmix.py +++ b/deepctr/models/dcnmix.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Shuxun Zan, zanshuxun@aliyun.com diff --git a/deepctr/models/deepfm.py b/deepctr/models/deepfm.py index 125744ad..14ad4c8c 100644 --- a/deepctr/models/deepfm.py +++ b/deepctr/models/deepfm.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247) diff --git a/deepctr/models/dien.py b/deepctr/models/dien.py index 3b167472..98ce9f1d 100644 --- a/deepctr/models/dien.py +++ b/deepctr/models/dien.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Zhou G, Mou N, Fan Y, et al. Deep Interest Evolution Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1809.03672, 2018. (https://arxiv.org/pdf/1809.03672.pdf) diff --git a/deepctr/models/din.py b/deepctr/models/din.py index 1e2c536d..9c3ba5c5 100644 --- a/deepctr/models/din.py +++ b/deepctr/models/din.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf) diff --git a/deepctr/models/dsin.py b/deepctr/models/dsin.py index de52ea0b..5091c296 100644 --- a/deepctr/models/dsin.py +++ b/deepctr/models/dsin.py @@ -1,7 +1,7 @@ # coding: utf-8 """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Feng Y, Lv F, Shen W, et al. Deep Session Interest Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.06482, 2019.(https://arxiv.org/abs/1905.06482) diff --git a/deepctr/models/fgcnn.py b/deepctr/models/fgcnn.py index 3ee1eaa4..8860511e 100644 --- a/deepctr/models/fgcnn.py +++ b/deepctr/models/fgcnn.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019. diff --git a/deepctr/models/fibinet.py b/deepctr/models/fibinet.py index c0407c6a..7cf5922c 100644 --- a/deepctr/models/fibinet.py +++ b/deepctr/models/fibinet.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019. diff --git a/deepctr/models/fnn.py b/deepctr/models/fnn.py index b2d729ac..73c282a4 100644 --- a/deepctr/models/fnn.py +++ b/deepctr/models/fnn.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf) diff --git a/deepctr/models/mlr.py b/deepctr/models/mlr.py index 3cff00d4..393d006c 100644 --- a/deepctr/models/mlr.py +++ b/deepctr/models/mlr.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.(https://arxiv.org/abs/1704.05194) diff --git a/deepctr/models/nfm.py b/deepctr/models/nfm.py index 5c643a94..84899f6c 100644 --- a/deepctr/models/nfm.py +++ b/deepctr/models/nfm.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027) diff --git a/deepctr/models/onn.py b/deepctr/models/onn.py index bc96abaa..2708fc74 100644 --- a/deepctr/models/onn.py +++ b/deepctr/models/onn.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Yang Y, Xu B, Shen F, et al. Operation-aware Neural Networks for User Response Prediction[J]. arXiv preprint arXiv:1904.12579, 2019. (https://arxiv.org/pdf/1904.12579) diff --git a/deepctr/models/pnn.py b/deepctr/models/pnn.py index c450c6ac..c8b94ac0 100644 --- a/deepctr/models/pnn.py +++ b/deepctr/models/pnn.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf) diff --git a/deepctr/models/wdl.py b/deepctr/models/wdl.py index 1d8a51e4..0cad17f5 100644 --- a/deepctr/models/wdl.py +++ b/deepctr/models/wdl.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf) diff --git a/deepctr/models/xdeepfm.py b/deepctr/models/xdeepfm.py index 2f518cd5..6e3bd7c8 100644 --- a/deepctr/models/xdeepfm.py +++ b/deepctr/models/xdeepfm.py @@ -1,7 +1,7 @@ # -*- coding:utf-8 -*- """ Author: - Weichen Shen, wcshen1994@163.com + Weichen Shen, weichenswc@163.com Reference: [1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf) diff --git a/deepctr/utils.py b/deepctr/utils.py index 2049e0c9..7fe3b25a 100644 --- a/deepctr/utils.py +++ b/deepctr/utils.py @@ -2,7 +2,7 @@ """ Author: - Weichen Shen,wcshen1994@163.com + Weichen Shen,weichenswc@163.com """ diff --git a/docs/pics/BST.png b/docs/pics/BST.png new file mode 100644 index 00000000..799b7135 Binary files /dev/null and b/docs/pics/BST.png differ diff --git a/docs/source/Examples.md b/docs/source/Examples.md index 34d3733f..de6b33c1 100644 --- a/docs/source/Examples.md +++ b/docs/source/Examples.md @@ -44,7 +44,7 @@ if __name__ == "__main__": # 2.count #unique features for each sparse field,and record dense feature field name - fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4) + fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4) for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,) for feat in dense_features] @@ -161,7 +161,7 @@ if __name__ == "__main__": lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # 2.count #unique features for each sparse field - fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4) + fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4) for feat in sparse_features] linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns @@ -241,7 +241,7 @@ if __name__ == "__main__": # 2.count #unique features for each sparse field and generate feature config for sequence feature - fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4) + fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4) for feat in sparse_features] use_weighted_sequence = False @@ -415,8 +415,8 @@ if __name__ == "__main__": for i, feat in enumerate(sparse_features): dnn_feature_columns.append(tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_identity(feat, data[feat].nunique()), 4)) - linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].nunique())) + tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4)) + linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1)) for feat in dense_features: dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) linear_feature_columns.append(tf.feature_column.numeric_column(feat)) diff --git a/docs/source/Features.md b/docs/source/Features.md index d64acd05..4ec13c7c 100644 --- a/docs/source/Features.md +++ b/docs/source/Features.md @@ -284,6 +284,18 @@ Deep Session Interest Network (DSIN) extracts users' multiple historical session [Feng Y, Lv F, Shen W, et al. Deep Session Interest Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.06482, 2019.](https://arxiv.org/abs/1905.06482) +### BST(Behavior Sequence Transformer) + +BST use the powerful Transformer model to capture the sequential signals underlying users’ behavior sequences . + +[**BST Model API**](./deepctr.models.bst.html) + +[BST example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_din.py) + +![BST](../pics/BST.png) + +[Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:)](https://arxiv.org/pdf/1905.06874.pdf) + ### FiBiNET(Feature Importance and Bilinear feature Interaction NETwork) Feature Importance and Bilinear feature Interaction NETwork is proposed to dynamically learn the feature importance and fine-grained feature interactions. On the one hand, the FiBiNET can dynamically learn the importance of fea- tures via the Squeeze-Excitation network (SENET) mechanism; on the other hand, it is able to effectively learn the feature interactions via bilinear function. diff --git a/docs/source/History.md b/docs/source/History.md index b9aa12a1..2559dccc 100644 --- a/docs/source/History.md +++ b/docs/source/History.md @@ -1,4 +1,5 @@ # History +- 03/13/2021 : [v0.8.5](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.5) released.Add [BST](./Features.html#bst-behavior-sequence-transformer) model. - 02/12/2021 : [v0.8.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.4) released.Fix bug in DCN-Mix. - 01/06/2021 : [v0.8.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.3) released.Add [DCN-Mix](./Features.html#dcn-mix-improved-deep-cross-network-with-mix-of-experts-and-matrix-kernel) model.Support `transform_fn` in `DenseFeat`. - 10/11/2020 : [v0.8.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.2) released.Refactor `DNN` Layer. diff --git a/docs/source/Models.rst b/docs/source/Models.rst index c7c80141..f123dea6 100644 --- a/docs/source/Models.rst +++ b/docs/source/Models.rst @@ -16,6 +16,7 @@ DeepCTR Models API DIN DIEN DSIN + BST xDeepFM AutoInt ONN diff --git a/docs/source/Quick-Start.md b/docs/source/Quick-Start.md index 0a862a0f..e587757f 100644 --- a/docs/source/Quick-Start.md +++ b/docs/source/Quick-Start.md @@ -79,7 +79,7 @@ And for varlen(multi-valued) sparse features,you can use [VarlenSparseFeat](./Fe - Label Encoding ```python -fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4) +fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4) for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,) for feat in dense_features] diff --git a/docs/source/conf.py b/docs/source/conf.py index 5a45c11a..f36db6d8 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -26,7 +26,7 @@ # The short X.Y version version = '' # The full version, including alpha/beta/rc tags -release = '0.8.3' +release = '0.8.5' # -- General configuration --------------------------------------------------- diff --git a/docs/source/deepctr.models.bst.rst b/docs/source/deepctr.models.bst.rst new file mode 100644 index 00000000..d3a41a09 --- /dev/null +++ b/docs/source/deepctr.models.bst.rst @@ -0,0 +1,7 @@ +deepctr.models.bst module +========================= + +.. automodule:: deepctr.models.bst + :members: + :no-undoc-members: + :no-show-inheritance: diff --git a/docs/source/index.rst b/docs/source/index.rst index 34bd41d0..f5acd97f 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -42,12 +42,12 @@ You can read the latest code and related projects News ----- +03/13/2021 : Add `BST <./Features.html#bst-behavior-sequence-transformer>`_ . `Changelog `_ + 02/12/2021 : Fix bug in DCN-Mix. `Changelog `_ 01/06/2021 : Add `DCN-Mix <./Features.html#dcn-mix-improved-deep-cross-network-with-mix-of-experts-and-matrix-kernel>`_ (`中文介绍 `_) and support ``transform_fn`` in ``DenseFeat``. `Changelog `_ -10/11/2020 : Refactor ``DNN`` Layer. `Changelog `_ - DisscussionGroup ----------------------- diff --git a/examples/run_classification_criteo.py b/examples/run_classification_criteo.py index 164030d2..d2e7c6a0 100644 --- a/examples/run_classification_criteo.py +++ b/examples/run_classification_criteo.py @@ -25,7 +25,7 @@ # 2.count #unique features for each sparse field,and record dense feature field name - fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4 ) + fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4 ) for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,) for feat in dense_features] diff --git a/examples/run_classification_criteo_multi_gpu.py b/examples/run_classification_criteo_multi_gpu.py index 440f2d39..cff0a617 100644 --- a/examples/run_classification_criteo_multi_gpu.py +++ b/examples/run_classification_criteo_multi_gpu.py @@ -26,7 +26,7 @@ # 2.count #unique features for each sparse field,and record dense feature field name - fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4) + fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] + [DenseFeat(feat, 1, ) for feat in dense_features] diff --git a/examples/run_din.py b/examples/run_din.py index 7187a8a5..44f162ee 100644 --- a/examples/run_din.py +++ b/examples/run_din.py @@ -1,16 +1,19 @@ import numpy as np -from deepctr.models import DIN -from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names +from deepctr.models import DIN, BST +from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names def get_xy_fd(): - - feature_columns = [SparseFeat('user',3,embedding_dim=10),SparseFeat( - 'gender', 2,embedding_dim=4), SparseFeat('item_id', 3 + 1,embedding_dim=8), SparseFeat('cate_id', 2 + 1,embedding_dim=4),DenseFeat('pay_score', 1)] - feature_columns += [VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1,embedding_dim=8,embedding_name='item_id'), maxlen=4), - VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1,embedding_dim=4, embedding_name='cate_id'), maxlen=4)] - + feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat( + 'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8), + SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)] + feature_columns += [ + VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), + maxlen=4, length_name="seq_length"), + VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, + length_name="seq_length")] + # Notice: History behavior sequence feature name must start with "hist_". behavior_feature_list = ["item_id", "cate_id"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) @@ -20,10 +23,12 @@ def get_xy_fd(): hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]]) hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]]) + seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id, - 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, 'pay_score': pay_score} - x = {name:feature_dict[name] for name in get_feature_names(feature_columns)} + 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, + 'pay_score': pay_score, 'seq_length': seq_length} + x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} y = np.array([1, 0, 1]) return x, y, feature_columns, behavior_feature_list @@ -31,6 +36,7 @@ def get_xy_fd(): if __name__ == "__main__": x, y, feature_columns, behavior_feature_list = get_xy_fd() model = DIN(feature_columns, behavior_feature_list) + # model = BST(feature_columns, behavior_feature_list,att_head_num=4) model.compile('adam', 'binary_crossentropy', metrics=['binary_crossentropy']) history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5) diff --git a/examples/run_estimator_pandas_classification.py b/examples/run_estimator_pandas_classification.py index 84fe9f9f..d531abef 100644 --- a/examples/run_estimator_pandas_classification.py +++ b/examples/run_estimator_pandas_classification.py @@ -31,8 +31,8 @@ for i, feat in enumerate(sparse_features): dnn_feature_columns.append(tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_identity(feat, data[feat].nunique()), 4)) - linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].nunique())) + tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4)) + linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1)) for feat in dense_features: dnn_feature_columns.append(tf.feature_column.numeric_column(feat)) linear_feature_columns.append(tf.feature_column.numeric_column(feat)) diff --git a/examples/run_flen.py b/examples/run_flen.py index c8c29a31..7ca235d2 100644 --- a/examples/run_flen.py +++ b/examples/run_flen.py @@ -38,7 +38,7 @@ ) fixlen_feature_columns = [ - SparseFeat(name, vocabulary_size=data[name].nunique(), embedding_dim=16, use_hash=False, dtype='int32', + SparseFeat(name, vocabulary_size=data[name].max() + 1, embedding_dim=16, use_hash=False, dtype='int32', group_name=field_info[name]) for name in sparse_features] dnn_feature_columns = fixlen_feature_columns diff --git a/examples/run_multivalue_movielens.py b/examples/run_multivalue_movielens.py index 72e7513b..2063db22 100644 --- a/examples/run_multivalue_movielens.py +++ b/examples/run_multivalue_movielens.py @@ -37,7 +37,7 @@ def split(x): # 2.count #unique features for each sparse field and generate feature config for sequence feature - fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(), embedding_dim=4) + fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4) for feat in sparse_features] use_weighted_sequence = False diff --git a/examples/run_regression_movielens.py b/examples/run_regression_movielens.py index 5b631477..484cf20a 100644 --- a/examples/run_regression_movielens.py +++ b/examples/run_regression_movielens.py @@ -18,7 +18,7 @@ lbe = LabelEncoder() data[feat] = lbe.fit_transform(data[feat]) # 2.count #unique features for each sparse field - fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4) + fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4) for feat in sparse_features] linear_feature_columns = fixlen_feature_columns dnn_feature_columns = fixlen_feature_columns diff --git a/setup.py b/setup.py index 17287a9e..746c1136 100644 --- a/setup.py +++ b/setup.py @@ -4,12 +4,12 @@ long_description = fh.read() REQUIRED_PACKAGES = [ - 'h5py==2.10.0','requests' + 'h5py==2.10.0', 'requests' ] setuptools.setup( name="deepctr", - version="0.8.3", + version="0.8.5", author="Weichen Shen", author_email="weichenswc@163.com", description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow 1.x and 2.x .", diff --git a/tests/models/BST_test.py b/tests/models/BST_test.py new file mode 100644 index 00000000..b895f4ef --- /dev/null +++ b/tests/models/BST_test.py @@ -0,0 +1,20 @@ +from deepctr.models import BST +from ..utils import check_model +from .DIN_test import get_xy_fd + + +def test_BST(): + model_name = "BST" + + x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True) + + model = BST(dnn_feature_columns=feature_columns, + history_feature_list=behavior_feature_list, + att_head_num=4) + + check_model(model, model_name, x, y, + check_model_io=True) + + +if __name__ == "__main__": + pass diff --git a/tests/models/DIN_test.py b/tests/models/DIN_test.py index c401f6e0..1b462cb9 100644 --- a/tests/models/DIN_test.py +++ b/tests/models/DIN_test.py @@ -6,29 +6,30 @@ def get_xy_fd(hash_flag=False): - feature_columns = [SparseFeat('user', 3), SparseFeat( - 'gender', 2), SparseFeat('item', 3 + 1), SparseFeat('item_gender', 2 + 1), DenseFeat('score', 1)] + feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat( + 'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8), + SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)] feature_columns += [ - VarLenSparseFeat(SparseFeat('hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'), - maxlen=4), - VarLenSparseFeat(SparseFeat('hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'), - maxlen=4)] - - behavior_feature_list = ["item", "item_gender"] + VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'), + maxlen=4, length_name="seq_length"), + VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4, + length_name="seq_length")] + # Notice: History behavior sequence feature name must start with "hist_". + behavior_feature_list = ["item_id", "cate_id"] uid = np.array([0, 1, 2]) ugender = np.array([0, 1, 0]) iid = np.array([1, 2, 3]) # 0 is mask value - igender = np.array([1, 2, 1]) # 0 is mask value - score = np.array([0.1, 0.2, 0.3]) - - hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]]) - hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]]) + cate_id = np.array([1, 2, 2]) # 0 is mask value + pay_score = np.array([0.1, 0.2, 0.3]) - feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender, - 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score} + hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]]) + hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]]) + seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence - feature_names = get_feature_names(feature_columns) - x = {name: feature_dict[name] for name in feature_names} + feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id, + 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, + 'pay_score': pay_score, 'seq_length': seq_length} + x = {name: feature_dict[name] for name in get_feature_names(feature_columns)} y = np.array([1, 0, 1]) return x, y, feature_columns, behavior_feature_list