diff --git a/.travis.yml b/.travis.yml
index 87324b97..4470dfa0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -59,7 +59,7 @@ script:
notifications:
recipients:
- - wcshen1994@163.com
+ - weichenswc@163.com
on_success: change
on_failure: change
diff --git a/README.md b/README.md
index 48e8e81f..7404a224 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@
[![Documentation Status](https://readthedocs.org/projects/deepctr-doc/badge/?version=latest)](https://deepctr-doc.readthedocs.io/)
![CI status](https://github.com/shenweichen/deepctr/workflows/CI/badge.svg)
[![Coverage Status](https://coveralls.io/repos/github/shenweichen/DeepCTR/badge.svg?branch=master)](https://coveralls.io/github/shenweichen/DeepCTR?branch=master)
-[![Codacy Badge](https://api.codacy.com/project/badge/Grade/d4099734dc0e4bab91d332ead8c0bdd0)](https://www.codacy.com/app/wcshen1994/DeepCTR?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/DeepCTR&utm_campaign=Badge_Grade)
+[![Codacy Badge](https://api.codacy.com/project/badge/Grade/d4099734dc0e4bab91d332ead8c0bdd0)](https://www.codacy.com/gh/shenweichen/DeepCTR?utm_source=github.com&utm_medium=referral&utm_content=shenweichen/DeepCTR&utm_campaign=Badge_Grade)
[![Disscussion](https://img.shields.io/badge/chat-wechat-brightgreen?style=flat)](./README.md#DisscussionGroup)
[![License](https://img.shields.io/github/license/shenweichen/deepctr.svg)](https://github.com/shenweichen/deepctr/blob/master/LICENSE)
@@ -54,6 +54,7 @@ Let's [**Get Started!**](https://deepctr-doc.readthedocs.io/en/latest/Quick-Star
| Deep Session Interest Network | [IJCAI 2019][Deep Session Interest Network for Click-Through Rate Prediction ](https://arxiv.org/abs/1905.06482) |
| FiBiNET | [RecSys 2019][FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction](https://arxiv.org/pdf/1905.09433.pdf) |
| FLEN | [arxiv 2019][FLEN: Leveraging Field for Scalable CTR Prediction](https://arxiv.org/pdf/1911.04690.pdf) |
+| BST | [DLP-KDD 2019][Behavior sequence transformer for e-commerce recommendation in Alibaba](https://arxiv.org/pdf/1905.06874.pdf) |
| DCN V2 | [arxiv 2020][DCN V2: Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535) |
## Citation
@@ -82,3 +83,38 @@ If you find this code useful in your research, please cite it using the followin
![wechat](./docs/pics/code.png)
+
+## Main contributors([welcome to join us!](./CONTRIBUTING.md))
+
+
+
+
+
+
+ Shen Weichen
+
+ Alibaba Group
+ |
+
+
+ Zan Shuxun
+ Beijing University of Posts and Telecommunications
+ |
+
+
+ Harshit Pande
+ Amazon
+ |
+
+
+ Li Zichao
+ Peking University
+ |
+
+
+ LeoCai
+ Chongqing University of Posts and Telecommunications
+ |
+
+
+
diff --git a/deepctr/__init__.py b/deepctr/__init__.py
index b268aaeb..b3b93e89 100644
--- a/deepctr/__init__.py
+++ b/deepctr/__init__.py
@@ -1,4 +1,4 @@
-from .utils import check_version
-
-__version__ = '0.8.3'
-check_version(__version__)
+from .utils import check_version
+
+__version__ = '0.8.5'
+check_version(__version__)
diff --git a/deepctr/estimator/models/afm.py b/deepctr/estimator/models/afm.py
index b56282dc..e53e76d4 100644
--- a/deepctr/estimator/models/afm.py
+++ b/deepctr/estimator/models/afm.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.
diff --git a/deepctr/estimator/models/autoint.py b/deepctr/estimator/models/autoint.py
index 40e3b1e1..843f41e6 100644
--- a/deepctr/estimator/models/autoint.py
+++ b/deepctr/estimator/models/autoint.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921)
diff --git a/deepctr/estimator/models/ccpm.py b/deepctr/estimator/models/ccpm.py
index cc788a38..0bae78fa 100644
--- a/deepctr/estimator/models/ccpm.py
+++ b/deepctr/estimator/models/ccpm.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.
diff --git a/deepctr/estimator/models/dcn.py b/deepctr/estimator/models/dcn.py
index ceba2b6b..78610be9 100644
--- a/deepctr/estimator/models/dcn.py
+++ b/deepctr/estimator/models/dcn.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Wang R, Fu B, Fu G, et al. Deep & cross network for ad click predictions[C]//Proceedings of the ADKDD'17. ACM, 2017: 12. (https://arxiv.org/abs/1708.05123)
diff --git a/deepctr/estimator/models/deepfm.py b/deepctr/estimator/models/deepfm.py
index 0021b506..25c311d7 100644
--- a/deepctr/estimator/models/deepfm.py
+++ b/deepctr/estimator/models/deepfm.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247)
diff --git a/deepctr/estimator/models/fibinet.py b/deepctr/estimator/models/fibinet.py
index 619f4f8e..1fc25a1f 100644
--- a/deepctr/estimator/models/fibinet.py
+++ b/deepctr/estimator/models/fibinet.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019.
diff --git a/deepctr/estimator/models/fnn.py b/deepctr/estimator/models/fnn.py
index aeb7de1a..f2270a06 100644
--- a/deepctr/estimator/models/fnn.py
+++ b/deepctr/estimator/models/fnn.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf)
diff --git a/deepctr/estimator/models/fwfm.py b/deepctr/estimator/models/fwfm.py
index f5de40ab..69b1fa37 100644
--- a/deepctr/estimator/models/fwfm.py
+++ b/deepctr/estimator/models/fwfm.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Harshit Pande
Reference:
diff --git a/deepctr/estimator/models/nfm.py b/deepctr/estimator/models/nfm.py
index ff10b776..cabab4ce 100644
--- a/deepctr/estimator/models/nfm.py
+++ b/deepctr/estimator/models/nfm.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027)
diff --git a/deepctr/estimator/models/pnn.py b/deepctr/estimator/models/pnn.py
index add1da8b..9dcdb5cc 100644
--- a/deepctr/estimator/models/pnn.py
+++ b/deepctr/estimator/models/pnn.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf)
diff --git a/deepctr/estimator/models/wdl.py b/deepctr/estimator/models/wdl.py
index 381d5f28..482c03b6 100644
--- a/deepctr/estimator/models/wdl.py
+++ b/deepctr/estimator/models/wdl.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf)
diff --git a/deepctr/estimator/models/xdeepfm.py b/deepctr/estimator/models/xdeepfm.py
index 65a0658a..b14a143c 100644
--- a/deepctr/estimator/models/xdeepfm.py
+++ b/deepctr/estimator/models/xdeepfm.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf)
diff --git a/deepctr/inputs.py b/deepctr/inputs.py
index 92019a85..a36e4e9b 100644
--- a/deepctr/inputs.py
+++ b/deepctr/inputs.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen,wcshen1994@163.com
+ Weichen Shen,weichenswc@163.com
"""
diff --git a/deepctr/layers/activation.py b/deepctr/layers/activation.py
index 1b97b4d7..5e55945f 100644
--- a/deepctr/layers/activation.py
+++ b/deepctr/layers/activation.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen,wcshen1994@163.com
+ Weichen Shen,weichenswc@163.com
"""
diff --git a/deepctr/layers/core.py b/deepctr/layers/core.py
index f81bf97b..6ee4b77b 100644
--- a/deepctr/layers/core.py
+++ b/deepctr/layers/core.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen,wcshen1994@163.com
+ Weichen Shen,weichenswc@163.com
"""
diff --git a/deepctr/layers/interaction.py b/deepctr/layers/interaction.py
index 87e159d1..fa32f047 100644
--- a/deepctr/layers/interaction.py
+++ b/deepctr/layers/interaction.py
@@ -2,7 +2,7 @@
"""
Authors:
- Weichen Shen,wcshen1994@163.com,
+ Weichen Shen,weichenswc@163.com,
Harshit Pande
"""
diff --git a/deepctr/layers/normalization.py b/deepctr/layers/normalization.py
index 7eec63ba..aa9d392c 100644
--- a/deepctr/layers/normalization.py
+++ b/deepctr/layers/normalization.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen,wcshen1994@163.com
+ Weichen Shen,weichenswc@163.com
"""
diff --git a/deepctr/layers/sequence.py b/deepctr/layers/sequence.py
index 3c767a07..4160fb11 100644
--- a/deepctr/layers/sequence.py
+++ b/deepctr/layers/sequence.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen,wcshen1994@163.com
+ Weichen Shen,weichenswc@163.com
"""
@@ -79,7 +79,7 @@ def call(self, seq_value_len_list, mask=None, **kwargs):
mask = tf.tile(mask, [1, 1, embedding_size])
if self.mode == "max":
- hist = uiseq_embed_list - (1-mask) * 1e9
+ hist = uiseq_embed_list - (1 - mask) * 1e9
return reduce_max(hist, 1, keep_dims=True)
hist = reduce_sum(uiseq_embed_list * mask, 1, keep_dims=False)
@@ -417,12 +417,12 @@ class Transformer(Layer):
""" Simplified version of Transformer proposed in 《Attention is all you need》
Input shape
- - a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if supports_masking=True.
- - a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if supports_masking=False.
+ - a list of two 3D tensor with shape ``(batch_size, timesteps, input_dim)`` if ``supports_masking=True`` .
+ - a list of two 4 tensors, first two tensors with shape ``(batch_size, timesteps, input_dim)``,last two tensors with shape ``(batch_size, 1)`` if ``supports_masking=False`` .
Output shape
- - 3D tensor with shape: ``(batch_size, 1, input_dim)``.
+ - 3D tensor with shape: ``(batch_size, 1, input_dim)`` if ``output_type='mean'`` or ``output_type='sum'`` , else ``(batch_size, timesteps, input_dim)`` .
Arguments
@@ -436,6 +436,8 @@ class Transformer(Layer):
- **blinding**: bool. Whether or not use blinding.
- **seed**: A Python integer to use as random seed.
- **supports_masking**:bool. Whether or not support masking.
+ - **attention_type**: str, Type of attention, the value must be one of { ``'scaled_dot_product'`` , ``'additive'`` }.
+ - **output_type**: ``'mean'`` , ``'sum'`` or `None`. Whether or not use average/sum pooling for output.
References
- [Vaswani, Ashish, et al. "Attention is all you need." Advances in Neural Information Processing Systems. 2017.](https://papers.nips.cc/paper/7181-attention-is-all-you-need.pdf)
@@ -443,7 +445,7 @@ class Transformer(Layer):
def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_positional_encoding=True, use_res=True,
use_feed_forward=True, use_layer_norm=False, blinding=True, seed=1024, supports_masking=False,
- **kwargs):
+ attention_type="scaled_dot_product", output_type="mean", **kwargs):
if head_num <= 0:
raise ValueError('head_num must be a int > 0')
self.att_embedding_size = att_embedding_size
@@ -456,6 +458,8 @@ def __init__(self, att_embedding_size=1, head_num=8, dropout_rate=0.0, use_posit
self.dropout_rate = dropout_rate
self.use_layer_norm = use_layer_norm
self.blinding = blinding
+ self.attention_type = attention_type
+ self.output_type = output_type
super(Transformer, self).__init__(**kwargs)
self.supports_masking = supports_masking
@@ -464,7 +468,7 @@ def build(self, input_shape):
if self.num_units != embedding_size:
raise ValueError(
"att_embedding_size * head_num must equal the last dimension size of inputs,got %d * %d != %d" % (
- self.att_embedding_size, self.head_num, embedding_size))
+ self.att_embedding_size, self.head_num, embedding_size))
self.seq_len_max = int(input_shape[0][-2])
self.W_Query = self.add_weight(name='query', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
@@ -475,6 +479,11 @@ def build(self, input_shape):
self.W_Value = self.add_weight(name='value', shape=[embedding_size, self.att_embedding_size * self.head_num],
dtype=tf.float32,
initializer=tf.keras.initializers.TruncatedNormal(seed=self.seed + 2))
+ if self.attention_type == "additive":
+ self.b = self.add_weight('b', shape=[self.att_embedding_size], dtype=tf.float32,
+ initializer=tf.keras.initializers.glorot_uniform(seed=self.seed))
+ self.v = self.add_weight('v', shape=[self.att_embedding_size], dtype=tf.float32,
+ initializer=tf.keras.initializers.glorot_uniform(seed=self.seed))
# if self.use_res:
# self.W_Res = self.add_weight(name='res', shape=[embedding_size, self.att_embedding_size * self.head_num], dtype=tf.float32,
# initializer=tf.keras.initializers.TruncatedNormal(seed=self.seed))
@@ -525,10 +534,18 @@ def call(self, inputs, mask=None, training=None, **kwargs):
keys = tf.concat(tf.split(keys, self.head_num, axis=2), axis=0)
values = tf.concat(tf.split(values, self.head_num, axis=2), axis=0)
- # head_num*None T_q T_k
- outputs = tf.matmul(querys, keys, transpose_b=True)
+ if self.attention_type == "scaled_dot_product":
+ # head_num*None T_q T_k
+ outputs = tf.matmul(querys, keys, transpose_b=True)
- outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5)
+ outputs = outputs / (keys.get_shape().as_list()[-1] ** 0.5)
+ elif self.attention_type == "additive":
+ querys_reshaped = tf.expand_dims(querys, axis=-2)
+ keys_reshaped = tf.expand_dims(keys, axis=-3)
+ outputs = tf.tanh(tf.nn.bias_add(querys_reshaped + keys_reshaped, self.b))
+ outputs = tf.squeeze(tf.tensordot(outputs, tf.expand_dims(self.v, axis=-1), axes=[-1, 0]), axis=-1)
+ else:
+ NotImplementedError
key_masks = tf.tile(key_masks, [self.head_num, 1])
@@ -579,7 +596,12 @@ def call(self, inputs, mask=None, training=None, **kwargs):
if self.use_layer_norm:
result = self.ln(result)
- return reduce_mean(result, axis=1, keep_dims=True)
+ if self.output_type == "mean":
+ return reduce_mean(result, axis=1, keep_dims=True)
+ elif self.output_type == "sum":
+ return reduce_sum(result, axis=1, keep_dims=True)
+ else:
+ return result
def compute_output_shape(self, input_shape):
@@ -593,7 +615,7 @@ def get_config(self, ):
'dropout_rate': self.dropout_rate, 'use_res': self.use_res,
'use_positional_encoding': self.use_positional_encoding, 'use_feed_forward': self.use_feed_forward,
'use_layer_norm': self.use_layer_norm, 'seed': self.seed, 'supports_masking': self.supports_masking,
- 'blinding': self.blinding}
+ 'blinding': self.blinding, 'attention_type': self.attention_type, 'output_type': self.output_type}
base_config = super(Transformer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
diff --git a/deepctr/layers/utils.py b/deepctr/layers/utils.py
index d13928a4..ca73d6a3 100644
--- a/deepctr/layers/utils.py
+++ b/deepctr/layers/utils.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen,wcshen1994@163.com
+ Weichen Shen,weichenswc@163.com
"""
import tensorflow as tf
diff --git a/deepctr/models/__init__.py b/deepctr/models/__init__.py
index 9e77e40d..217b357b 100644
--- a/deepctr/models/__init__.py
+++ b/deepctr/models/__init__.py
@@ -18,6 +18,7 @@
from .fibinet import FiBiNET
from .flen import FLEN
from .fwfm import FwFM
+from .bst import BST
__all__ = ["AFM", "CCPM", "DCN", "DCNMix", "MLR", "DeepFM", "MLR", "NFM", "DIN", "DIEN", "FNN", "PNN",
- "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM"]
+ "WDL", "xDeepFM", "AutoInt", "ONN", "FGCNN", "DSIN", "FiBiNET", 'FLEN', "FwFM", "BST"]
diff --git a/deepctr/models/afm.py b/deepctr/models/afm.py
index dd664373..3f5ea7d1 100644
--- a/deepctr/models/afm.py
+++ b/deepctr/models/afm.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Xiao J, Ye H, He X, et al. Attentional factorization machines: Learning the weight of feature interactions via attention networks[J]. arXiv preprint arXiv:1708.04617, 2017.
diff --git a/deepctr/models/autoint.py b/deepctr/models/autoint.py
index c742e3c2..1818e6a0 100644
--- a/deepctr/models/autoint.py
+++ b/deepctr/models/autoint.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Song W, Shi C, Xiao Z, et al. AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks[J]. arXiv preprint arXiv:1810.11921, 2018.(https://arxiv.org/abs/1810.11921)
diff --git a/deepctr/models/bst.py b/deepctr/models/bst.py
new file mode 100644
index 00000000..ba5a8bb8
--- /dev/null
+++ b/deepctr/models/bst.py
@@ -0,0 +1,107 @@
+# -*- coding:utf-8 -*-
+"""
+Author:
+ Zichao Li, 2843656167@qq.com
+
+Reference:
+ Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:https://doi.org/10.1145/3326937.3341261
+"""
+
+import tensorflow as tf
+from tensorflow.python.keras.layers import (Dense, Flatten)
+
+from ..feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, build_input_features
+from ..inputs import get_varlen_pooling_list, create_embedding_matrix, embedding_lookup, varlen_embedding_lookup, \
+ get_dense_input
+from ..layers.core import DNN, PredictionLayer
+from ..layers.sequence import Transformer, AttentionSequencePoolingLayer
+from ..layers.utils import concat_func, combined_dnn_input
+
+
+def BST(dnn_feature_columns, history_feature_list, transformer_num=1, att_head_num=8,
+ use_bn=False, dnn_hidden_units=(200, 80), dnn_activation='relu', l2_reg_dnn=0,
+ l2_reg_embedding=1e-6, dnn_dropout=0.0, seed=1024, task='binary'):
+ """Instantiates the BST architecture.
+
+ :param dnn_feature_columns: An iterable containing all the features used by deep part of the model.
+ :param history_feature_list: list, to indicate sequence sparse field.
+ :param transformer_num: int, the number of transformer layer.
+ :param att_head_num: int, the number of heads in multi-head self attention.
+ :param use_bn: bool. Whether use BatchNormalization before activation or not in deep net
+ :param dnn_hidden_units: list,list of positive integer or empty list, the layer number and units in each layer of DNN
+ :param dnn_activation: Activation function to use in DNN
+ :param l2_reg_dnn: float. L2 regularizer strength applied to DNN
+ :param l2_reg_embedding: float. L2 regularizer strength applied to embedding vector
+ :param dnn_dropout: float in [0,1), the probability we will drop out a given DNN coordinate.
+ :param seed: integer ,to use as random seed.
+ :param task: str, ``"binary"`` for binary logloss or ``"regression"`` for regression loss
+ :return: A Keras model instance.
+
+ """
+
+ features = build_input_features(dnn_feature_columns)
+ inputs_list = list(features.values())
+
+ user_behavior_length = features["seq_length"]
+
+ sparse_feature_columns = list(
+ filter(lambda x: isinstance(x, SparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
+ dense_feature_columns = list(
+ filter(lambda x: isinstance(x, DenseFeat), dnn_feature_columns)) if dnn_feature_columns else []
+ varlen_sparse_feature_columns = list(
+ filter(lambda x: isinstance(x, VarLenSparseFeat), dnn_feature_columns)) if dnn_feature_columns else []
+
+ history_feature_columns = []
+ sparse_varlen_feature_columns = []
+ history_fc_names = list(map(lambda x: "hist_" + x, history_feature_list))
+
+ for fc in varlen_sparse_feature_columns:
+ feature_name = fc.name
+ if feature_name in history_fc_names:
+ history_feature_columns.append(fc)
+ else:
+ sparse_varlen_feature_columns.append(fc)
+
+ embedding_dict = create_embedding_matrix(dnn_feature_columns, l2_reg_embedding, seed, prefix="",
+ seq_mask_zero=True)
+
+ query_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
+ return_feat_list=history_feature_list, to_list=True)
+ hist_emb_list = embedding_lookup(embedding_dict, features, history_feature_columns,
+ return_feat_list=history_fc_names, to_list=True)
+ dnn_input_emb_list = embedding_lookup(embedding_dict, features, sparse_feature_columns,
+ mask_feat_list=history_feature_list, to_list=True)
+ dense_value_list = get_dense_input(features, dense_feature_columns)
+ sequence_embed_dict = varlen_embedding_lookup(embedding_dict, features, sparse_varlen_feature_columns)
+ sequence_embed_list = get_varlen_pooling_list(sequence_embed_dict, features, sparse_varlen_feature_columns,
+ to_list=True)
+
+ dnn_input_emb_list += sequence_embed_list
+ query_emb = concat_func(query_emb_list)
+ deep_input_emb = concat_func(dnn_input_emb_list)
+ hist_emb = concat_func(hist_emb_list)
+
+ transformer_output = hist_emb
+ for i in range(transformer_num):
+ att_embedding_size = transformer_output.get_shape().as_list()[-1] // att_head_num
+ transformer_layer = Transformer(att_embedding_size=att_embedding_size, head_num=att_head_num,
+ dropout_rate=dnn_dropout, use_positional_encoding=True, use_res=True,
+ use_feed_forward=True, use_layer_norm=True, blinding=False, seed=seed,
+ supports_masking=False, output_type=None)
+ transformer_output = transformer_layer([transformer_output, transformer_output,
+ user_behavior_length, user_behavior_length])
+
+ attn_output = AttentionSequencePoolingLayer(att_hidden_units=(64, 16), weight_normalization=True,
+ supports_masking=False)([query_emb, transformer_output,
+ user_behavior_length])
+ deep_input_emb = concat_func([deep_input_emb, attn_output], axis=-1)
+ deep_input_emb = Flatten()(deep_input_emb)
+
+ dnn_input = combined_dnn_input([deep_input_emb], dense_value_list)
+ output = DNN(dnn_hidden_units, dnn_activation, l2_reg_dnn, dnn_dropout, use_bn, seed=seed)(dnn_input)
+ final_logit = Dense(1, use_bias=False, kernel_initializer=tf.keras.initializers.glorot_normal(seed))(output)
+ output = PredictionLayer(task)(final_logit)
+
+ model = tf.keras.models.Model(inputs=inputs_list, outputs=output)
+
+ return model
diff --git a/deepctr/models/ccpm.py b/deepctr/models/ccpm.py
index cfe0013f..05c8e5f1 100644
--- a/deepctr/models/ccpm.py
+++ b/deepctr/models/ccpm.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Liu Q, Yu F, Wu S, et al. A convolutional click prediction model[C]//Proceedings of the 24th ACM International on Conference on Information and Knowledge Management. ACM, 2015: 1743-1746.
diff --git a/deepctr/models/dcn.py b/deepctr/models/dcn.py
index 5a14f5a7..69d23e58 100644
--- a/deepctr/models/dcn.py
+++ b/deepctr/models/dcn.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Shuxun Zan, zanshuxun@aliyun.com
diff --git a/deepctr/models/dcnmix.py b/deepctr/models/dcnmix.py
index ea62cd24..7b257643 100644
--- a/deepctr/models/dcnmix.py
+++ b/deepctr/models/dcnmix.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Shuxun Zan, zanshuxun@aliyun.com
diff --git a/deepctr/models/deepfm.py b/deepctr/models/deepfm.py
index 125744ad..14ad4c8c 100644
--- a/deepctr/models/deepfm.py
+++ b/deepctr/models/deepfm.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Guo H, Tang R, Ye Y, et al. Deepfm: a factorization-machine based neural network for ctr prediction[J]. arXiv preprint arXiv:1703.04247, 2017.(https://arxiv.org/abs/1703.04247)
diff --git a/deepctr/models/dien.py b/deepctr/models/dien.py
index 3b167472..98ce9f1d 100644
--- a/deepctr/models/dien.py
+++ b/deepctr/models/dien.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Zhou G, Mou N, Fan Y, et al. Deep Interest Evolution Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1809.03672, 2018. (https://arxiv.org/pdf/1809.03672.pdf)
diff --git a/deepctr/models/din.py b/deepctr/models/din.py
index 1e2c536d..9c3ba5c5 100644
--- a/deepctr/models/din.py
+++ b/deepctr/models/din.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Zhou G, Zhu X, Song C, et al. Deep interest network for click-through rate prediction[C]//Proceedings of the 24th ACM SIGKDD International Conference on Knowledge Discovery & Data Mining. ACM, 2018: 1059-1068. (https://arxiv.org/pdf/1706.06978.pdf)
diff --git a/deepctr/models/dsin.py b/deepctr/models/dsin.py
index de52ea0b..5091c296 100644
--- a/deepctr/models/dsin.py
+++ b/deepctr/models/dsin.py
@@ -1,7 +1,7 @@
# coding: utf-8
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Feng Y, Lv F, Shen W, et al. Deep Session Interest Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.06482, 2019.(https://arxiv.org/abs/1905.06482)
diff --git a/deepctr/models/fgcnn.py b/deepctr/models/fgcnn.py
index 3ee1eaa4..8860511e 100644
--- a/deepctr/models/fgcnn.py
+++ b/deepctr/models/fgcnn.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Liu B, Tang R, Chen Y, et al. Feature Generation by Convolutional Neural Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1904.04447, 2019.
diff --git a/deepctr/models/fibinet.py b/deepctr/models/fibinet.py
index c0407c6a..7cf5922c 100644
--- a/deepctr/models/fibinet.py
+++ b/deepctr/models/fibinet.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Huang T, Zhang Z, Zhang J. FiBiNET: Combining Feature Importance and Bilinear feature Interaction for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.09433, 2019.
diff --git a/deepctr/models/fnn.py b/deepctr/models/fnn.py
index b2d729ac..73c282a4 100644
--- a/deepctr/models/fnn.py
+++ b/deepctr/models/fnn.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Zhang W, Du T, Wang J. Deep learning over multi-field categorical data[C]//European conference on information retrieval. Springer, Cham, 2016: 45-57.(https://arxiv.org/pdf/1601.02376.pdf)
diff --git a/deepctr/models/mlr.py b/deepctr/models/mlr.py
index 3cff00d4..393d006c 100644
--- a/deepctr/models/mlr.py
+++ b/deepctr/models/mlr.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Gai K, Zhu X, Li H, et al. Learning Piece-wise Linear Models from Large Scale Data for Ad Click Prediction[J]. arXiv preprint arXiv:1704.05194, 2017.(https://arxiv.org/abs/1704.05194)
diff --git a/deepctr/models/nfm.py b/deepctr/models/nfm.py
index 5c643a94..84899f6c 100644
--- a/deepctr/models/nfm.py
+++ b/deepctr/models/nfm.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] He X, Chua T S. Neural factorization machines for sparse predictive analytics[C]//Proceedings of the 40th International ACM SIGIR conference on Research and Development in Information Retrieval. ACM, 2017: 355-364. (https://arxiv.org/abs/1708.05027)
diff --git a/deepctr/models/onn.py b/deepctr/models/onn.py
index bc96abaa..2708fc74 100644
--- a/deepctr/models/onn.py
+++ b/deepctr/models/onn.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Yang Y, Xu B, Shen F, et al. Operation-aware Neural Networks for User Response Prediction[J]. arXiv preprint arXiv:1904.12579, 2019. (https://arxiv.org/pdf/1904.12579)
diff --git a/deepctr/models/pnn.py b/deepctr/models/pnn.py
index c450c6ac..c8b94ac0 100644
--- a/deepctr/models/pnn.py
+++ b/deepctr/models/pnn.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.(https://arxiv.org/pdf/1611.00144.pdf)
diff --git a/deepctr/models/wdl.py b/deepctr/models/wdl.py
index 1d8a51e4..0cad17f5 100644
--- a/deepctr/models/wdl.py
+++ b/deepctr/models/wdl.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Cheng H T, Koc L, Harmsen J, et al. Wide & deep learning for recommender systems[C]//Proceedings of the 1st Workshop on Deep Learning for Recommender Systems. ACM, 2016: 7-10.(https://arxiv.org/pdf/1606.07792.pdf)
diff --git a/deepctr/models/xdeepfm.py b/deepctr/models/xdeepfm.py
index 2f518cd5..6e3bd7c8 100644
--- a/deepctr/models/xdeepfm.py
+++ b/deepctr/models/xdeepfm.py
@@ -1,7 +1,7 @@
# -*- coding:utf-8 -*-
"""
Author:
- Weichen Shen, wcshen1994@163.com
+ Weichen Shen, weichenswc@163.com
Reference:
[1] Lian J, Zhou X, Zhang F, et al. xDeepFM: Combining Explicit and Implicit Feature Interactions for Recommender Systems[J]. arXiv preprint arXiv:1803.05170, 2018.(https://arxiv.org/pdf/1803.05170.pdf)
diff --git a/deepctr/utils.py b/deepctr/utils.py
index 2049e0c9..7fe3b25a 100644
--- a/deepctr/utils.py
+++ b/deepctr/utils.py
@@ -2,7 +2,7 @@
"""
Author:
- Weichen Shen,wcshen1994@163.com
+ Weichen Shen,weichenswc@163.com
"""
diff --git a/docs/pics/BST.png b/docs/pics/BST.png
new file mode 100644
index 00000000..799b7135
Binary files /dev/null and b/docs/pics/BST.png differ
diff --git a/docs/source/Examples.md b/docs/source/Examples.md
index 34d3733f..de6b33c1 100644
--- a/docs/source/Examples.md
+++ b/docs/source/Examples.md
@@ -44,7 +44,7 @@ if __name__ == "__main__":
# 2.count #unique features for each sparse field,and record dense feature field name
- fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4)
+ fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4)
for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,)
for feat in dense_features]
@@ -161,7 +161,7 @@ if __name__ == "__main__":
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
# 2.count #unique features for each sparse field
- fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4)
+ fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4)
for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
@@ -241,7 +241,7 @@ if __name__ == "__main__":
# 2.count #unique features for each sparse field and generate feature config for sequence feature
- fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4)
+ fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4)
for feat in sparse_features]
use_weighted_sequence = False
@@ -415,8 +415,8 @@ if __name__ == "__main__":
for i, feat in enumerate(sparse_features):
dnn_feature_columns.append(tf.feature_column.embedding_column(
- tf.feature_column.categorical_column_with_identity(feat, data[feat].nunique()), 4))
- linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].nunique()))
+ tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4))
+ linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1))
for feat in dense_features:
dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
linear_feature_columns.append(tf.feature_column.numeric_column(feat))
diff --git a/docs/source/Features.md b/docs/source/Features.md
index d64acd05..4ec13c7c 100644
--- a/docs/source/Features.md
+++ b/docs/source/Features.md
@@ -284,6 +284,18 @@ Deep Session Interest Network (DSIN) extracts users' multiple historical session
[Feng Y, Lv F, Shen W, et al. Deep Session Interest Network for Click-Through Rate Prediction[J]. arXiv preprint arXiv:1905.06482, 2019.](https://arxiv.org/abs/1905.06482)
+### BST(Behavior Sequence Transformer)
+
+BST use the powerful Transformer model to capture the sequential signals underlying users’ behavior sequences .
+
+[**BST Model API**](./deepctr.models.bst.html)
+
+[BST example](https://github.com/shenweichen/DeepCTR/tree/master/examples/run_din.py)
+
+![BST](../pics/BST.png)
+
+[Qiwei Chen, Huan Zhao, Wei Li, Pipei Huang, and Wenwu Ou. 2019. Behavior sequence transformer for e-commerce recommendation in Alibaba. In Proceedings of the 1st International Workshop on Deep Learning Practice for High-Dimensional Sparse Data (DLP-KDD '19). Association for Computing Machinery, New York, NY, USA, Article 12, 1–4. DOI:)](https://arxiv.org/pdf/1905.06874.pdf)
+
### FiBiNET(Feature Importance and Bilinear feature Interaction NETwork)
Feature Importance and Bilinear feature Interaction NETwork is proposed to dynamically learn the feature importance and fine-grained feature interactions. On the one hand, the FiBiNET can dynamically learn the importance of fea- tures via the Squeeze-Excitation network (SENET) mechanism; on the other hand, it is able to effectively learn the feature interactions via bilinear function.
diff --git a/docs/source/History.md b/docs/source/History.md
index b9aa12a1..2559dccc 100644
--- a/docs/source/History.md
+++ b/docs/source/History.md
@@ -1,4 +1,5 @@
# History
+- 03/13/2021 : [v0.8.5](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.5) released.Add [BST](./Features.html#bst-behavior-sequence-transformer) model.
- 02/12/2021 : [v0.8.4](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.4) released.Fix bug in DCN-Mix.
- 01/06/2021 : [v0.8.3](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.3) released.Add [DCN-Mix](./Features.html#dcn-mix-improved-deep-cross-network-with-mix-of-experts-and-matrix-kernel) model.Support `transform_fn` in `DenseFeat`.
- 10/11/2020 : [v0.8.2](https://github.com/shenweichen/DeepCTR/releases/tag/v0.8.2) released.Refactor `DNN` Layer.
diff --git a/docs/source/Models.rst b/docs/source/Models.rst
index c7c80141..f123dea6 100644
--- a/docs/source/Models.rst
+++ b/docs/source/Models.rst
@@ -16,6 +16,7 @@ DeepCTR Models API
DIN
DIEN
DSIN
+ BST
xDeepFM
AutoInt
ONN
diff --git a/docs/source/Quick-Start.md b/docs/source/Quick-Start.md
index 0a862a0f..e587757f 100644
--- a/docs/source/Quick-Start.md
+++ b/docs/source/Quick-Start.md
@@ -79,7 +79,7 @@ And for varlen(multi-valued) sparse features,you can use [VarlenSparseFeat](./Fe
- Label Encoding
```python
-fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4)
+fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4)
for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,)
for feat in dense_features]
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 5a45c11a..f36db6d8 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -26,7 +26,7 @@
# The short X.Y version
version = ''
# The full version, including alpha/beta/rc tags
-release = '0.8.3'
+release = '0.8.5'
# -- General configuration ---------------------------------------------------
diff --git a/docs/source/deepctr.models.bst.rst b/docs/source/deepctr.models.bst.rst
new file mode 100644
index 00000000..d3a41a09
--- /dev/null
+++ b/docs/source/deepctr.models.bst.rst
@@ -0,0 +1,7 @@
+deepctr.models.bst module
+=========================
+
+.. automodule:: deepctr.models.bst
+ :members:
+ :no-undoc-members:
+ :no-show-inheritance:
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 34bd41d0..f5acd97f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -42,12 +42,12 @@ You can read the latest code and related projects
News
-----
+03/13/2021 : Add `BST <./Features.html#bst-behavior-sequence-transformer>`_ . `Changelog `_
+
02/12/2021 : Fix bug in DCN-Mix. `Changelog `_
01/06/2021 : Add `DCN-Mix <./Features.html#dcn-mix-improved-deep-cross-network-with-mix-of-experts-and-matrix-kernel>`_ (`中文介绍 `_) and support ``transform_fn`` in ``DenseFeat``. `Changelog `_
-10/11/2020 : Refactor ``DNN`` Layer. `Changelog `_
-
DisscussionGroup
-----------------------
diff --git a/examples/run_classification_criteo.py b/examples/run_classification_criteo.py
index 164030d2..d2e7c6a0 100644
--- a/examples/run_classification_criteo.py
+++ b/examples/run_classification_criteo.py
@@ -25,7 +25,7 @@
# 2.count #unique features for each sparse field,and record dense feature field name
- fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(),embedding_dim=4 )
+ fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4 )
for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,)
for feat in dense_features]
diff --git a/examples/run_classification_criteo_multi_gpu.py b/examples/run_classification_criteo_multi_gpu.py
index 440f2d39..cff0a617 100644
--- a/examples/run_classification_criteo_multi_gpu.py
+++ b/examples/run_classification_criteo_multi_gpu.py
@@ -26,7 +26,7 @@
# 2.count #unique features for each sparse field,and record dense feature field name
- fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].nunique(), embedding_dim=4)
+ fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1, embedding_dim=4)
for feat in sparse_features] + [DenseFeat(feat, 1, )
for feat in dense_features]
diff --git a/examples/run_din.py b/examples/run_din.py
index 7187a8a5..44f162ee 100644
--- a/examples/run_din.py
+++ b/examples/run_din.py
@@ -1,16 +1,19 @@
import numpy as np
-from deepctr.models import DIN
-from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat,get_feature_names
+from deepctr.models import DIN, BST
+from deepctr.feature_column import SparseFeat, VarLenSparseFeat, DenseFeat, get_feature_names
def get_xy_fd():
-
- feature_columns = [SparseFeat('user',3,embedding_dim=10),SparseFeat(
- 'gender', 2,embedding_dim=4), SparseFeat('item_id', 3 + 1,embedding_dim=8), SparseFeat('cate_id', 2 + 1,embedding_dim=4),DenseFeat('pay_score', 1)]
- feature_columns += [VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1,embedding_dim=8,embedding_name='item_id'), maxlen=4),
- VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1,embedding_dim=4, embedding_name='cate_id'), maxlen=4)]
-
+ feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat(
+ 'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8),
+ SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)]
+ feature_columns += [
+ VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
+ maxlen=4, length_name="seq_length"),
+ VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
+ length_name="seq_length")]
+ # Notice: History behavior sequence feature name must start with "hist_".
behavior_feature_list = ["item_id", "cate_id"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
@@ -20,10 +23,12 @@ def get_xy_fd():
hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
+ seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence
feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
- 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id, 'pay_score': pay_score}
- x = {name:feature_dict[name] for name in get_feature_names(feature_columns)}
+ 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
+ 'pay_score': pay_score, 'seq_length': seq_length}
+ x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
y = np.array([1, 0, 1])
return x, y, feature_columns, behavior_feature_list
@@ -31,6 +36,7 @@ def get_xy_fd():
if __name__ == "__main__":
x, y, feature_columns, behavior_feature_list = get_xy_fd()
model = DIN(feature_columns, behavior_feature_list)
+ # model = BST(feature_columns, behavior_feature_list,att_head_num=4)
model.compile('adam', 'binary_crossentropy',
metrics=['binary_crossentropy'])
history = model.fit(x, y, verbose=1, epochs=10, validation_split=0.5)
diff --git a/examples/run_estimator_pandas_classification.py b/examples/run_estimator_pandas_classification.py
index 84fe9f9f..d531abef 100644
--- a/examples/run_estimator_pandas_classification.py
+++ b/examples/run_estimator_pandas_classification.py
@@ -31,8 +31,8 @@
for i, feat in enumerate(sparse_features):
dnn_feature_columns.append(tf.feature_column.embedding_column(
- tf.feature_column.categorical_column_with_identity(feat, data[feat].nunique()), 4))
- linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].nunique()))
+ tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1), 4))
+ linear_feature_columns.append(tf.feature_column.categorical_column_with_identity(feat, data[feat].max() + 1))
for feat in dense_features:
dnn_feature_columns.append(tf.feature_column.numeric_column(feat))
linear_feature_columns.append(tf.feature_column.numeric_column(feat))
diff --git a/examples/run_flen.py b/examples/run_flen.py
index c8c29a31..7ca235d2 100644
--- a/examples/run_flen.py
+++ b/examples/run_flen.py
@@ -38,7 +38,7 @@
)
fixlen_feature_columns = [
- SparseFeat(name, vocabulary_size=data[name].nunique(), embedding_dim=16, use_hash=False, dtype='int32',
+ SparseFeat(name, vocabulary_size=data[name].max() + 1, embedding_dim=16, use_hash=False, dtype='int32',
group_name=field_info[name]) for name in sparse_features]
dnn_feature_columns = fixlen_feature_columns
diff --git a/examples/run_multivalue_movielens.py b/examples/run_multivalue_movielens.py
index 72e7513b..2063db22 100644
--- a/examples/run_multivalue_movielens.py
+++ b/examples/run_multivalue_movielens.py
@@ -37,7 +37,7 @@ def split(x):
# 2.count #unique features for each sparse field and generate feature config for sequence feature
- fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(), embedding_dim=4)
+ fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1, embedding_dim=4)
for feat in sparse_features]
use_weighted_sequence = False
diff --git a/examples/run_regression_movielens.py b/examples/run_regression_movielens.py
index 5b631477..484cf20a 100644
--- a/examples/run_regression_movielens.py
+++ b/examples/run_regression_movielens.py
@@ -18,7 +18,7 @@
lbe = LabelEncoder()
data[feat] = lbe.fit_transform(data[feat])
# 2.count #unique features for each sparse field
- fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4)
+ fixlen_feature_columns = [SparseFeat(feat, data[feat].max() + 1,embedding_dim=4)
for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
diff --git a/setup.py b/setup.py
index 17287a9e..746c1136 100644
--- a/setup.py
+++ b/setup.py
@@ -4,12 +4,12 @@
long_description = fh.read()
REQUIRED_PACKAGES = [
- 'h5py==2.10.0','requests'
+ 'h5py==2.10.0', 'requests'
]
setuptools.setup(
name="deepctr",
- version="0.8.3",
+ version="0.8.5",
author="Weichen Shen",
author_email="weichenswc@163.com",
description="Easy-to-use,Modular and Extendible package of deep learning based CTR(Click Through Rate) prediction models with tensorflow 1.x and 2.x .",
diff --git a/tests/models/BST_test.py b/tests/models/BST_test.py
new file mode 100644
index 00000000..b895f4ef
--- /dev/null
+++ b/tests/models/BST_test.py
@@ -0,0 +1,20 @@
+from deepctr.models import BST
+from ..utils import check_model
+from .DIN_test import get_xy_fd
+
+
+def test_BST():
+ model_name = "BST"
+
+ x, y, feature_columns, behavior_feature_list = get_xy_fd(hash_flag=True)
+
+ model = BST(dnn_feature_columns=feature_columns,
+ history_feature_list=behavior_feature_list,
+ att_head_num=4)
+
+ check_model(model, model_name, x, y,
+ check_model_io=True)
+
+
+if __name__ == "__main__":
+ pass
diff --git a/tests/models/DIN_test.py b/tests/models/DIN_test.py
index c401f6e0..1b462cb9 100644
--- a/tests/models/DIN_test.py
+++ b/tests/models/DIN_test.py
@@ -6,29 +6,30 @@
def get_xy_fd(hash_flag=False):
- feature_columns = [SparseFeat('user', 3), SparseFeat(
- 'gender', 2), SparseFeat('item', 3 + 1), SparseFeat('item_gender', 2 + 1), DenseFeat('score', 1)]
+ feature_columns = [SparseFeat('user', 3, embedding_dim=10), SparseFeat(
+ 'gender', 2, embedding_dim=4), SparseFeat('item_id', 3 + 1, embedding_dim=8),
+ SparseFeat('cate_id', 2 + 1, embedding_dim=4), DenseFeat('pay_score', 1)]
feature_columns += [
- VarLenSparseFeat(SparseFeat('hist_item', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item'),
- maxlen=4),
- VarLenSparseFeat(SparseFeat('hist_item_gender', 2 + 1, embedding_dim=4, embedding_name='item_gender'),
- maxlen=4)]
-
- behavior_feature_list = ["item", "item_gender"]
+ VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
+ maxlen=4, length_name="seq_length"),
+ VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
+ length_name="seq_length")]
+ # Notice: History behavior sequence feature name must start with "hist_".
+ behavior_feature_list = ["item_id", "cate_id"]
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3]) # 0 is mask value
- igender = np.array([1, 2, 1]) # 0 is mask value
- score = np.array([0.1, 0.2, 0.3])
-
- hist_iid = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
- hist_igender = np.array([[1, 1, 2, 0], [2, 1, 1, 0], [2, 1, 0, 0]])
+ cate_id = np.array([1, 2, 2]) # 0 is mask value
+ pay_score = np.array([0.1, 0.2, 0.3])
- feature_dict = {'user': uid, 'gender': ugender, 'item': iid, 'item_gender': igender,
- 'hist_item': hist_iid, 'hist_item_gender': hist_igender, 'score': score}
+ hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
+ hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
+ seq_length = np.array([3, 3, 2]) # the actual length of the behavior sequence
- feature_names = get_feature_names(feature_columns)
- x = {name: feature_dict[name] for name in feature_names}
+ feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
+ 'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
+ 'pay_score': pay_score, 'seq_length': seq_length}
+ x = {name: feature_dict[name] for name in get_feature_names(feature_columns)}
y = np.array([1, 0, 1])
return x, y, feature_columns, behavior_feature_list