From 63184378840d1cc2708bc64d2d3c2807077fffc3 Mon Sep 17 00:00:00 2001 From: Michael Hu Date: Fri, 17 Apr 2020 12:59:18 -0400 Subject: [PATCH] fix bugs due to tf2 release (#447) --- .../cloudml-collaborative-filtering/README.md | 12 ++++++++++-- .../bin/run.predict.cloud.sh | 5 +++++ .../bin/run.serve.cloud.sh | 5 +++++ .../bin/run.train.cloud.sh | 9 +++++++-- .../bin/run.train.local.sh | 5 +++++ .../preprocessing/config.example.ini | 2 +- .../preprocessing/run_preprocess.py | 2 +- .../requirements.txt | 18 +++++++++--------- .../trainer/model.py | 2 +- .../trainer/utils.py | 6 +++--- 10 files changed, 47 insertions(+), 19 deletions(-) diff --git a/examples/cloudml-collaborative-filtering/README.md b/examples/cloudml-collaborative-filtering/README.md index 70d3dda02b..96b3b63cb2 100644 --- a/examples/cloudml-collaborative-filtering/README.md +++ b/examples/cloudml-collaborative-filtering/README.md @@ -26,12 +26,19 @@ Create a new project on GCP and set up GCP credentials: gcloud auth login gcloud auth application-default login ``` + +Enable the following APIS: +- [Dataflow](http://console.cloud.google.com/apis/api/dataflow.googleapis.com) +- [AI Platform](http://console.cloud.google.com/apis/api/ml.googleapis.com) + Using the `preprocessing/config.example.ini` template, create `preprocessing/config.ini` with the GCP project id fields filled in. +Additionally, you will need to create a GCS bucket. This code assumes a bucket +exists by the name of `[project-id]-bucket`. Set up your python environment: ```shell -virtualenv venv -p python3 +python3 -m venv venv source ./venv/bin/activate pip install -r requirements.txt ``` @@ -113,7 +120,8 @@ Model training can be monitored on Tensorboard using the following command: tensorboard --logdir / ``` Tensorboard's projector, in particular, is very useful for debugging -or analyzing embeddings. +or analyzing embeddings. In the projector tab in Tensorboard, try setting the +label to `name`. ## Serving Models can be hosted on CAIP, which can be used to make online and batch predictions via JSON requests. diff --git a/examples/cloudml-collaborative-filtering/bin/run.predict.cloud.sh b/examples/cloudml-collaborative-filtering/bin/run.predict.cloud.sh index 8f3ad496f9..a3143af096 100755 --- a/examples/cloudml-collaborative-filtering/bin/run.predict.cloud.sh +++ b/examples/cloudml-collaborative-filtering/bin/run.predict.cloud.sh @@ -22,6 +22,11 @@ # TRIAL (optional): The trial number to use. . ./bin/_common.sh +if [ "$#" -lt 1 ]; then + echo "Illegal number of parameters. Should be >= 1, given $#." + exit 1 +fi + MODEL_OUTPUTS_DIR=$1 TRIAL=${2:-${DEFAULT_TRIAL}} diff --git a/examples/cloudml-collaborative-filtering/bin/run.serve.cloud.sh b/examples/cloudml-collaborative-filtering/bin/run.serve.cloud.sh index f1eab5914c..c5a4c68248 100755 --- a/examples/cloudml-collaborative-filtering/bin/run.serve.cloud.sh +++ b/examples/cloudml-collaborative-filtering/bin/run.serve.cloud.sh @@ -22,6 +22,11 @@ # TRIAL (optional): The trial number to use. . ./bin/_common.sh +if [ "$#" -lt 1 ]; then + echo "Illegal number of parameters. Should be >= 1, given $#." + exit 1 +fi + MODEL_OUTPUTS_DIR=$1 TRIAL=${2:-${DEFAULT_TRIAL}} diff --git a/examples/cloudml-collaborative-filtering/bin/run.train.cloud.sh b/examples/cloudml-collaborative-filtering/bin/run.train.cloud.sh index 2a9ec01b1c..2e8f335923 100755 --- a/examples/cloudml-collaborative-filtering/bin/run.train.cloud.sh +++ b/examples/cloudml-collaborative-filtering/bin/run.train.cloud.sh @@ -21,6 +21,11 @@ # This should just be a timestamp. . ./bin/_common.sh +if [ "$#" -lt 1 ]; then + echo "Illegal number of parameters. Should be >= 1, given $#." + exit 1 +fi + MODEL_INPUTS_DIR=$1 NOW="$(get_date_time)" @@ -37,13 +42,13 @@ gcloud ai-platform jobs submit training "${TRAINING_JOB_NAME}" \ --staging-bucket "${OUTPUT_BUCKET}" \ --package-path trainer \ --region us-east1 \ - --runtime-version 1.13 \ + --runtime-version 1.15 \ --scale-tier "${SCALE_TIER}" \ -- \ --model_dir "${MODEL_PATH}" \ --input_dir "${INPUT_PATH}" \ --tft_dir "${TFT_PATH}" \ - --max_steps 300000 \ + --max_steps 100000 \ --batch_size 512 \ --user_embed_mult 1.5 \ --item_embed_mult 1 \ diff --git a/examples/cloudml-collaborative-filtering/bin/run.train.local.sh b/examples/cloudml-collaborative-filtering/bin/run.train.local.sh index 5eac159b4a..50c133293e 100755 --- a/examples/cloudml-collaborative-filtering/bin/run.train.local.sh +++ b/examples/cloudml-collaborative-filtering/bin/run.train.local.sh @@ -21,6 +21,11 @@ # This should just be a timestamp. . ./bin/_common.sh +if [ "$#" -lt 1 ]; then + echo "Illegal number of parameters. Should be >= 1, given $#." + exit 1 +fi + MODEL_INPUTS_DIR=$1 PROJECT_ID="$(get_project_id)" diff --git a/examples/cloudml-collaborative-filtering/preprocessing/config.example.ini b/examples/cloudml-collaborative-filtering/preprocessing/config.example.ini index 86a9211ef2..c5d746f96d 100644 --- a/examples/cloudml-collaborative-filtering/preprocessing/config.example.ini +++ b/examples/cloudml-collaborative-filtering/preprocessing/config.example.ini @@ -4,7 +4,7 @@ runner = DataflowRunner max_num_workers = 5 defaultWorkerLogLevel = INFO log_level = ERROR -zone = us-east1-b +region = us-east1 [LOCAL] project = [GCP_PROJECT_ID] diff --git a/examples/cloudml-collaborative-filtering/preprocessing/run_preprocess.py b/examples/cloudml-collaborative-filtering/preprocessing/run_preprocess.py index 313d4ca891..67bd5c60b8 100644 --- a/examples/cloudml-collaborative-filtering/preprocessing/run_preprocess.py +++ b/examples/cloudml-collaborative-filtering/preprocessing/run_preprocess.py @@ -107,7 +107,7 @@ def get_pipeline_options(args, config): "../setup.py")), "staging_location": os.path.join(args.job_dir, "staging"), "temp_location": os.path.join(args.job_dir, "tmp"), - "zone": config.get("zone") + "region": config.get("region"), }) pipeline_options = beam.pipeline.PipelineOptions(flags=[], **options) return pipeline_options diff --git a/examples/cloudml-collaborative-filtering/requirements.txt b/examples/cloudml-collaborative-filtering/requirements.txt index 018c7fe31e..d700a61a99 100644 --- a/examples/cloudml-collaborative-filtering/requirements.txt +++ b/examples/cloudml-collaborative-filtering/requirements.txt @@ -1,9 +1,9 @@ - apache-beam[gcp]>=2.13.0 - configparser>=3.7.4 - google-api-core>=1.13.0 - google-api-python-client>=1.7.9 - google-cloud-core>=1.0.2 - nose>=1.3.7 - tensorflow>=1.14.0 - tensorflow-transform>=0.13.0 - +apache-beam[gcp]>=2.16.0 +configparser>=3.7.4 +google-api-core>=1.13.0 +google-api-python-client>=1.7.9 +google-cloud-core>=1.0.2 +nose>=1.3.7 +tensorboard==1.15.0 +tensorflow>=0.15.0,<2.0.0 +tensorflow-transform==0.21.2 diff --git a/examples/cloudml-collaborative-filtering/trainer/model.py b/examples/cloudml-collaborative-filtering/trainer/model.py index 88229c4b2c..7656ec3fa1 100644 --- a/examples/cloudml-collaborative-filtering/trainer/model.py +++ b/examples/cloudml-collaborative-filtering/trainer/model.py @@ -20,8 +20,8 @@ import json import os +from tensorboard.plugins import projector import tensorflow as tf -from tensorflow.contrib.tensorboard.plugins import projector import tensorflow_transform as tft # pylint: disable=g-bad-import-order diff --git a/examples/cloudml-collaborative-filtering/trainer/utils.py b/examples/cloudml-collaborative-filtering/trainer/utils.py index 7785bd182c..3b21aed2bf 100644 --- a/examples/cloudml-collaborative-filtering/trainer/utils.py +++ b/examples/cloudml-collaborative-filtering/trainer/utils.py @@ -71,8 +71,8 @@ def write_projector_metadata(metadata_dir, tft_dir): constants.NUM_PROJECTOR_ITEMS) metadata = user_metadata + item_metadata metadata_path = os.path.join(metadata_dir, constants.PROJECTOR_PATH) - tf.gfile.MakeDirs(metadata_dir) - with tf.gfile.GFile(metadata_path, "w+") as f: + tf.io.gfile.makedirs(metadata_dir) + with tf.io.gfile.GFile(metadata_path, "w+") as f: f.write("label\tname\n") - f.write("\n".join(["\t".join(sample) for sample in metadata])) + f.write("\n".join(["{}\t{}".format(label, name) for label, name in metadata])) return user_indices, item_indices