change dtype string to str of numpy&pandas object

shenweichen · May 4, 2019 · 6713b8c · 6713b8c
1 parent 0c0a718
commit 6713b8c
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 15 deletions.
diff --git a/docs/source/Examples.md b/docs/source/Examples.md
@@ -52,7 +52,7 @@ if __name__ == "__main__":
 
     sparse_feature_list = [SingleFeat(feat, data[feat].nunique())
                            for feat in sparse_features]
-    dense_feature_list = [SingleFeat(feat, 0, False)
+    dense_feature_list = [SingleFeat(feat, 0,)
                           for feat in dense_features]
 
     # 3.generate input data for model
@@ -286,7 +286,7 @@ data = pd.read_csv("./movielens_sample.txt")
 sparse_features = ["movie_id", "user_id",
                    "gender", "age", "occupation", "zip", ]
 
-data[sparse_features] = data[sparse_features].astype('string')
+data[sparse_features] = data[sparse_features].astype(str)
 target = ['rating']
 
 # 1.Use hashing encoding on the fly for sparse features,and process sequence features
@@ -296,7 +296,7 @@ genres_length = np.array(list(map(len, genres_list)))
 max_len = max(genres_length)
 
 # Notice : padding=`post`
-genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype='string', value=0)
+genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=str, value=0)
 
 # 2.set hashing space for each sparse field and generate feature config for sequence feature
 

diff --git a/examples/run_all.sh b/examples/run_all.sh
@@ -1,17 +1,41 @@
 #!/usr/bin/env bash
 
+function run_py(){
+
+    code_path=./
+    for file in `ls`
+    do
+      if [[ $file =~ .py ]]
+        then
+          python $code_path$file
+          if [ $? -eq 0 ]
+            then
+              echo run $code_path$file succeed in $python_version
+            else
+              echo run $code_path$file failed in $python_version
+              exit -1
+          fi
+      fi
+    done
+
+
+}
+#python2
+python_version=python2
 cd ..
 pip install deepctr -e .
 cd ./examples
-code_path=./
+run_py
+echo "all examples run succeed in python2.7"
 
-python $code_path"run_classification_criteo.py"
-python $code_path"run_classification_criteo_hash.py"
-python $code_path"run_regression_movielens.py"
-python $code_path"run_multivalue_movielens.py"
-python $code_path"run_multivalue_movielens_hash.py"
-python $code_path"run_dien.py"
-python $code_path"run_din.py"
+## python3
+python_version=python3
+source activate py36
+cd ..
+pip install deepctr -e .
+cd ./examples
+run_py
 
+echo "all examples run succeed in python3.6"
 
-echo "examples run done!!"
+echo "all examples run succeed in python2.7 and python3.6"
diff --git a/examples/run_classification_criteo.py b/examples/run_classification_criteo.py
@@ -27,7 +27,7 @@
 
     sparse_feature_list = [SingleFeat(feat, data[feat].nunique())
                            for feat in sparse_features]
-    dense_feature_list = [SingleFeat(feat, 0, False)
+    dense_feature_list = [SingleFeat(feat, 0,)
                           for feat in dense_features]
 
     # 3.generate input data for model

diff --git a/examples/run_multivalue_movielens_hash.py b/examples/run_multivalue_movielens_hash.py
@@ -9,7 +9,7 @@
 sparse_features = ["movie_id", "user_id",
                    "gender", "age", "occupation", "zip", ]
 
-data[sparse_features] = data[sparse_features].astype('string')
+data[sparse_features] = data[sparse_features].astype(str)
 target = ['rating']
 
 # 1.Use hashing encoding on the fly for sparse features,and process sequence features
@@ -19,7 +19,7 @@
 max_len = max(genres_length)
 
 # Notice : padding=`post`
-genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype='string', value=0)
+genres_list = pad_sequences(genres_list, maxlen=max_len, padding='post', dtype=str, value=0)
 
 # 2.set hashing space for each sparse field and generate feature config for sequence feature