Skip to content

Commit

Permalink
change dtype string to str of numpy&pandas object
Browse files Browse the repository at this point in the history
  • Loading branch information
Weichen Shen committed May 4, 2019
1 parent 0c0a718 commit 6713b8c
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 15 deletions.
6 changes: 3 additions & 3 deletions docs/source/Examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ if __name__ == "__main__":

sparse_feature_list = [SingleFeat(feat, data[feat].nunique())
for feat in sparse_features]
dense_feature_list = [SingleFeat(feat, 0, False)
dense_feature_list = [SingleFeat(feat, 0,)
for feat in dense_features]

# 3.generate input data for model
Expand Down Expand Up @@ -286,7 +286,7 @@ data = pd.read_csv("./movielens_sample.txt")
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]

data[sparse_features] = data[sparse_features].astype('string')
data[sparse_features] = data[sparse_features].astype(str)
target = ['rating']

# 1.Use hashing encoding on the fly for sparse features,and process sequence features
Expand All @@ -296,7 +296,7 @@ genres_length = np.array(list(map(len, genres_list)))
max_len = max(genres_length)

# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype='string', value=0)
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=str, value=0)

# 2.set hashing space for each sparse field and generate feature config for sequence feature

Expand Down
42 changes: 33 additions & 9 deletions examples/run_all.sh
Original file line number Diff line number Diff line change
@@ -1,17 +1,41 @@
#!/usr/bin/env bash

function run_py(){

code_path=./
for file in `ls`
do
if [[ $file =~ .py ]]
then
python $code_path$file
if [ $? -eq 0 ]
then
echo run $code_path$file succeed in $python_version
else
echo run $code_path$file failed in $python_version
exit -1
fi
fi
done


}
#python2
python_version=python2
cd ..
pip install deepctr -e .
cd ./examples
code_path=./
run_py
echo "all examples run succeed in python2.7"

python $code_path"run_classification_criteo.py"
python $code_path"run_classification_criteo_hash.py"
python $code_path"run_regression_movielens.py"
python $code_path"run_multivalue_movielens.py"
python $code_path"run_multivalue_movielens_hash.py"
python $code_path"run_dien.py"
python $code_path"run_din.py"
## python3
python_version=python3
source activate py36
cd ..
pip install deepctr -e .
cd ./examples
run_py

echo "all examples run succeed in python3.6"

echo "examples run done!!"
echo "all examples run succeed in python2.7 and python3.6"
2 changes: 1 addition & 1 deletion examples/run_classification_criteo.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

sparse_feature_list = [SingleFeat(feat, data[feat].nunique())
for feat in sparse_features]
dense_feature_list = [SingleFeat(feat, 0, False)
dense_feature_list = [SingleFeat(feat, 0,)
for feat in dense_features]

# 3.generate input data for model
Expand Down
4 changes: 2 additions & 2 deletions examples/run_multivalue_movielens_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
sparse_features = ["movie_id", "user_id",
"gender", "age", "occupation", "zip", ]

data[sparse_features] = data[sparse_features].astype('string')
data[sparse_features] = data[sparse_features].astype(str)
target = ['rating']

# 1.Use hashing encoding on the fly for sparse features,and process sequence features
Expand All @@ -19,7 +19,7 @@
max_len = max(genres_length)

# Notice : padding=`post`
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype='string', value=0)
genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=str, value=0)

# 2.set hashing space for each sparse field and generate feature config for sequence feature

Expand Down

0 comments on commit 6713b8c

Please sign in to comment.