-
Notifications
You must be signed in to change notification settings - Fork 16
Mingshan/Adding resnet50 validation script #478
base: master
Are you sure you want to change the base?
Changes from all commits
2ec2dc3
73396a4
d243d23
b86aae2
5fb1681
f5e824f
9dc0d6c
e346e9c
3745b0d
4957bd5
1a89cf1
fc71ed0
e10d87b
398ebf9
cd351b0
3801dd9
ab2ef67
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
diff --git a/scripts/tf_cnn_benchmarks/benchmark_cnn.py b/scripts/tf_cnn_benchmarks/benchmark_cnn.py | ||
index 09b118e..4cf9a12 100644 | ||
--- a/scripts/tf_cnn_benchmarks/benchmark_cnn.py | ||
+++ b/scripts/tf_cnn_benchmarks/benchmark_cnn.py | ||
@@ -34,6 +34,7 @@ import numpy as np | ||
import six | ||
from six.moves import xrange # pylint: disable=redefined-builtin | ||
import tensorflow as tf | ||
+import ngraph_bridge | ||
|
||
from google.protobuf import text_format | ||
|
||
@@ -2479,6 +2480,7 @@ class BenchmarkCNN(object): | ||
fetches = self._build_fetches(global_step, all_logits, losses, device_grads, | ||
enqueue_ops, update_ops, all_accuracy_ops, | ||
phase_train) | ||
+ | ||
if global_input_producer_op: | ||
global_input_producer_op = tf.group(*global_input_producer_op) | ||
else: | ||
diff --git a/scripts/tf_cnn_benchmarks/data_utils.py b/scripts/tf_cnn_benchmarks/data_utils.py | ||
index 0376d0b..992ee75 100644 | ||
--- a/scripts/tf_cnn_benchmarks/data_utils.py | ||
+++ b/scripts/tf_cnn_benchmarks/data_utils.py | ||
@@ -112,7 +112,10 @@ def create_dataset(batch_size, | ||
if not file_names: | ||
raise ValueError('Found no files in --data_dir matching: {}' | ||
.format(glob_pattern)) | ||
- ds = tf.data.TFRecordDataset.list_files(file_names) | ||
+ | ||
+ # ds = tf.data.TFRecordDataset.list_files(file_names) | ||
+ ds = tf.data.TFRecordDataset.list_files(file_names, shuffle=False, seed=10) | ||
+ | ||
ds = ds.apply( | ||
interleave_ops.parallel_interleave( | ||
tf.data.TFRecordDataset, cycle_length=10)) | ||
@@ -122,8 +125,9 @@ def create_dataset(batch_size, | ||
counter = counter.repeat() | ||
ds = tf.data.Dataset.zip((ds, counter)) | ||
ds = ds.prefetch(buffer_size=batch_size) | ||
- if train: | ||
- ds = ds.shuffle(buffer_size=10000) | ||
+ # Make dataset loader deterministic | ||
+ # if train: | ||
+ # ds = ds.shuffle(buffer_size=10000) | ||
ds = ds.repeat() | ||
ds = ds.apply( | ||
batching.map_and_batch( | ||
diff --git a/scripts/tf_cnn_benchmarks/preprocessing.py b/scripts/tf_cnn_benchmarks/preprocessing.py | ||
index 6a270b0..4e84a1a 100644 | ||
--- a/scripts/tf_cnn_benchmarks/preprocessing.py | ||
+++ b/scripts/tf_cnn_benchmarks/preprocessing.py | ||
@@ -335,9 +335,11 @@ def train_image(image_buffer, | ||
else: | ||
image = tf.image.decode_jpeg(image_buffer, channels=3, | ||
dct_method='INTEGER_FAST') | ||
- image = tf.slice(image, bbox_begin, bbox_size) | ||
|
||
- distorted_image = tf.image.random_flip_left_right(image) | ||
+ #image = tf.slice(image, bbox_begin, bbox_size) | ||
+ | ||
+ #distorted_image = tf.image.random_flip_left_right(image) | ||
+ distorted_image = image | ||
|
||
# This resizing operation may distort the images because the aspect | ||
# ratio is not respected. | ||
@@ -361,7 +363,7 @@ def train_image(image_buffer, | ||
distorted_image = distort_color(distorted_image, batch_position, | ||
distort_color_in_yiq=distort_color_in_yiq) | ||
|
||
- # Note: This ensures the scaling matches the output of eval_image | ||
+ #Note: This ensures the scaling matches the output of eval_image | ||
distorted_image *= 255 | ||
|
||
if summary_verbosity >= 3: | ||
@@ -487,10 +489,11 @@ class RecordInputImagePreprocessor(BaseImagePreprocessor): | ||
"""Preprocessing image_buffer as a function of its batch position.""" | ||
if self.train: | ||
image = train_image(image_buffer, self.height, self.width, bbox, | ||
- batch_position, self.resize_method, self.distortions, | ||
+ batch_position, self.resize_method, False, | ||
None, summary_verbosity=self.summary_verbosity, | ||
distort_color_in_yiq=self.distort_color_in_yiq, | ||
- fuse_decode_and_crop=self.fuse_decode_and_crop) | ||
+ #fuse_decode_and_crop=self.fuse_decode_and_crop | ||
+ fuse_decode_and_crop=False) | ||
else: | ||
image = tf.image.decode_jpeg( | ||
image_buffer, channels=3, dct_method='INTEGER_FAST') |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
diff --git a/scripts/tf_cnn_benchmarks/benchmark_cnn.py b/scripts/tf_cnn_benchmarks/benchmark_cnn.py | ||
index 09b118e..d5a4e29 100644 | ||
--- a/scripts/tf_cnn_benchmarks/benchmark_cnn.py | ||
+++ b/scripts/tf_cnn_benchmarks/benchmark_cnn.py | ||
@@ -34,6 +34,7 @@ import numpy as np | ||
import six | ||
from six.moves import xrange # pylint: disable=redefined-builtin | ||
import tensorflow as tf | ||
+import ngraph_bridge | ||
|
||
from google.protobuf import text_format | ||
|
||
@@ -726,13 +727,23 @@ def benchmark_one_step(sess, | ||
summary_str = None | ||
start_time = time.time() | ||
if summary_op is None: | ||
- results = sess.run(fetches, options=run_options, run_metadata=run_metadata) | ||
+ # get a new set of fetch operation | ||
+ new_fetches = {} | ||
+ for f in fetches: | ||
+ if f == "average_loss": | ||
+ continue | ||
+ new_fetches[f] = fetches[f] | ||
+ | ||
+ results = sess.run(new_fetches, options=run_options, run_metadata=run_metadata) | ||
+ #results = sess.run(fetches, options=run_options, run_metadata=run_metadata) | ||
else: | ||
(results, summary_str) = sess.run( | ||
[fetches, summary_op], options=run_options, run_metadata=run_metadata) | ||
|
||
if not params.forward_only: | ||
- lossval = results['average_loss'] | ||
+ # the calculation is removed in the operations to be fetched | ||
+ #lossval = results['average_loss'] | ||
+ lossval = 0 | ||
else: | ||
lossval = 0. | ||
if image_producer is not None: |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,263 @@ | ||
from subprocess import check_output, call, Popen, PIPE | ||
import numpy as np | ||
import os | ||
|
||
# This script will run resnet50 training validation with synthetic data and real data | ||
# and compare the results with the desired reference run. | ||
# If the reference files are not provided it runs on TF(w/o nGraph) and uses its output | ||
# as reference | ||
# Assumed this validation.py script is under a tensorflow/benchmarks/ repo | ||
# with git head at commit ab01ecc. | ||
# TODO: | ||
# 1. num_bathces set to 100 | ||
# 2. Makes certain assumptions about the reference_file 's name and the batch size | ||
# 3. Add Arguments to take in the backend, the reference log files, the number of iterations/batches, | ||
# the data type (real or synthetic) | ||
# 4. Automate the cloning of benchmarks repo and running the script | ||
|
||
validate_with_real_data_command_NG = 'NGRAPH_TF_BACKEND=GPU python tf_cnn_benchmarks.py ' \ | ||
+ '--num_inter_threads=2 --data_format=NCHW --model=resnet50 --batch_size=32 ' \ | ||
+ '--num_gpus=1 --data_dir /mnt/data/TF_ImageNet_latest/ --data_name=imagenet ' \ | ||
+ '--datasets_use_prefetch=False --print_training_accuracy=True ' \ | ||
+ '--num_learning_rate_warmup_epochs=0 --num_batches=100' | ||
validate_with_real_data_command_TF = 'NGRAPH_TF_DISABLE=1 python tf_cnn_benchmarks.py ' \ | ||
+ '--num_inter_threads=2 --data_format=NHWC --model=resnet50 --batch_size=32 ' \ | ||
+ '--num_gpus=1 --data_dir /mnt/data/TF_ImageNet_latest/ --data_name=imagenet ' \ | ||
+ '--datasets_use_prefetch=False --print_training_accuracy=True ' \ | ||
+ '--num_learning_rate_warmup_epochs=0 --num_batches=100' | ||
validate_with_synthetic_data_command_NG = 'NGRAPH_TF_BACKEND=GPU python tf_cnn_benchmarks.py ' \ | ||
+ '--num_inter_threads=2 --tf_random_seed=1234 --data_format=NCHW ' \ | ||
+ '--model=resnet50 --batch_size=32 --num_gpus=1 --data_name=imagenet ' \ | ||
+ '--datasets_use_prefetch=False --print_training_accuracy=True ' \ | ||
+ '--num_learning_rate_warmup_epochs=0 --num_batches=100' | ||
validate_with_synthetic_data_command_TF = 'NGRAPH_TF_DISABLE=1 python tf_cnn_benchmarks.py ' \ | ||
+ '--num_inter_threads=2 --tf_random_seed=1234 --data_format=NHWC --model=resnet50 ' \ | ||
+ '--batch_size=32 --num_gpus=1 --data_name=imagenet --datasets_use_prefetch=False ' \ | ||
+ '--print_training_accuracy=True --num_learning_rate_warmup_epochs=0 --num_batches=100' | ||
|
||
|
||
def command_executor(cmd, verbose=False, msg=None, stdout=None): | ||
if verbose or msg is not None: | ||
tag = 'Running COMMAND: ' if msg is None else msg | ||
print(tag + cmd) | ||
|
||
p = Popen( | ||
cmd, | ||
shell=True, | ||
stdin=PIPE, | ||
stdout=PIPE, | ||
stderr=PIPE, | ||
close_fds=True, | ||
bufsize=1) | ||
output = p.stdout.read() | ||
error_output = p.stderr.read() | ||
|
||
return output, error_output | ||
|
||
|
||
def write_to_file(filename, content): | ||
with open(filename, "w") as text_file: | ||
text_file.write(content) | ||
|
||
|
||
def parse_training_output(output): | ||
to_parse = False | ||
total_loss = [] | ||
top1_acc = [] | ||
top5_acc = [] | ||
|
||
for line in output.strip().split("\n"): | ||
if line.split()[0] == 'Step': | ||
to_parse = True | ||
continue | ||
|
||
elif line.startswith('-----'): | ||
to_parse = False | ||
continue | ||
|
||
if to_parse: | ||
total_loss.append(line.split()[-3]) | ||
top1_acc.append(line.split()[-2]) | ||
top5_acc.append(line.split()[-1]) | ||
|
||
return total_loss, top1_acc, top5_acc | ||
|
||
|
||
def parse_reference_file(filename): | ||
to_parse = False | ||
total_loss = [] | ||
top1_acc = [] | ||
top5_acc = [] | ||
|
||
with open(filename) as reference_result: | ||
for line in reference_result: | ||
if line.split()[0] == 'Step': | ||
to_parse = True | ||
continue | ||
|
||
elif line.startswith('-----'): | ||
to_parse = False | ||
continue | ||
|
||
if to_parse: | ||
total_loss.append(line.split()[-3]) | ||
top1_acc.append(line.split()[-2]) | ||
top5_acc.append(line.split()[-1]) | ||
|
||
return total_loss, top1_acc, top5_acc | ||
|
||
|
||
def check_validation_results(norm_dict, metric): | ||
test_pass = True | ||
for norm in norm_dict: | ||
if norm_dict[norm] > 0.1: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so if we get ref accuracy = 75, and ng accuracy = 75.3, then is it a failure? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This script is not comparing the accuracy. It compares the training loss value at every iteration There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let me check. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same thing for loss. If ref loss is 1, and we get 0.8, is the test passing? |
||
print(metric + " " + norm + | ||
" is greater than the threshold 0.1, validation failed") | ||
test_pass = False | ||
return test_pass | ||
|
||
|
||
# Return L1, L2, inf norm of the input arrays | ||
def calculate_norm_values(result1, result2): | ||
l1_norm = np.linalg.norm( | ||
(np.array(result1, dtype=np.float) - np.array(result2, dtype=np.float)), | ||
1) | ||
|
||
l2_norm = np.linalg.norm( | ||
(np.array(result1, dtype=np.float) - np.array(result2, dtype=np.float)), | ||
2) | ||
|
||
inf_norm = np.linalg.norm( | ||
(np.array(result1, dtype=np.float) - np.array(result2, dtype=np.float)), | ||
np.inf) | ||
return {"l1_norm": l1_norm, "l2_norm": l2_norm, "inf_norm": inf_norm} | ||
|
||
|
||
def run_validation(data_format, reference_file_name, batch_size): | ||
# Apply the patch to make input data loader deterministic for real data validation | ||
# Assume the current directory already has the required patch | ||
if os.path.isfile('./datasets_make_deterministic.patch'): | ||
output, error_output = command_executor( | ||
'git apply --check --whitespace=nowarn ' + | ||
'./datasets_make_deterministic.patch') | ||
if error_output: | ||
print( | ||
"Warning: datasets_make_determinitic.patch is already applied") | ||
else: | ||
command_executor('git apply --whitespace=nowarn ' + | ||
'./datasets_make_deterministic.patch') | ||
|
||
# Run the validation command on NGraph | ||
if (data_format == "real_data"): | ||
command_to_run = validate_with_real_data_command_NG + str(batch_size) | ||
elif (data_format == "synthetic_data"): | ||
command_to_run = validate_with_synthetic_data_command_NG + \ | ||
str(batch_size) | ||
shresthamalik marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
print("Running: ", command_to_run) | ||
output, error_output = command_executor(command_to_run) | ||
output_string = str(output, 'utf-8') | ||
|
||
if output: | ||
ngraph_outputs_total_loss, ngraph_outputs_top1_acc, ngraph_outputs_top5_acc = parse_training_output( | ||
output_string) | ||
|
||
elif error_output: | ||
print("Something went wrong executing the command ", | ||
validate_with_real_data_command_NG) | ||
print(str(error_output, 'utf-8')) | ||
exit(1) | ||
|
||
print("ngraph total loss ", ngraph_outputs_total_loss) | ||
print("ngraph top1 Accuracy ", ngraph_outputs_top1_acc) | ||
print("ngraph top5 Accuracy ", ngraph_outputs_top5_acc) | ||
|
||
write_to_file( | ||
"resnet50_validationResult_NG_" + data_format + "_BS" + str(batch_size) | ||
+ ".txt", output_string) | ||
|
||
# Get TF output: Either from a reference file or from actual run command | ||
# check if already has some TF result file | ||
cwd = os.getcwd() | ||
reference_file_path = cwd + reference_file_name + \ | ||
'_BS' + str(batch_size) + ".txt" | ||
print("Finding reference file ", reference_file_path) | ||
if os.path.isfile(reference_file_path): | ||
# parse the text file directly | ||
reference_outputs_total_loss, reference_outputs_top1_acc, reference_outputs_top5_acc = parse_reference_file( | ||
reference_file_path) | ||
else: | ||
# Run the validation command on TF | ||
# This requires the TF needs to build with GPU | ||
print("No reference output file found, begin running reference command") | ||
print("Running: ", validate_with_real_data_command_TF) | ||
output, error_output = command_executor( | ||
validate_with_real_data_command_TF) | ||
output_string = str(output, 'utf-8') | ||
|
||
if output: | ||
reference_outputs_total_loss, reference_outputs_top1_acc, reference_outputs_top5_acc = parse_training_output( | ||
output_string) | ||
elif error_output: | ||
print("Something went wrong executing the command ", | ||
validate_with_real_data_command_NG) | ||
print(str(error_output, 'utf-8')) | ||
exit(1) | ||
|
||
write_to_file( | ||
"resnet50_validaionResultReference" + str(batch_size) + ".txt", | ||
output_string) | ||
|
||
print("reference total loss ", reference_outputs_total_loss) | ||
print("reference top1Acc ", reference_outputs_top1_acc) | ||
print("reference top5Acc ", reference_outputs_top5_acc) | ||
|
||
# Compare the TF output and NG output | ||
# TF CPU results and GPU results are not the same, so for TF results | ||
# Need to run with TF GPU | ||
assert len(ngraph_outputs_total_loss) == len( | ||
reference_outputs_total_loss), "Number of total_loss values mismatch" | ||
assert len(ngraph_outputs_top1_acc) == len( | ||
reference_outputs_top1_acc), "Number of top1_accuracy values mismatch" | ||
assert len(ngraph_outputs_top5_acc) == len( | ||
reference_outputs_top5_acc), "Number of top5_accuracy values mismatch" | ||
|
||
loss_norms = calculate_norm_values(ngraph_outputs_total_loss, | ||
reference_outputs_total_loss) | ||
top1Acc_norms = calculate_norm_values(ngraph_outputs_top1_acc, | ||
reference_outputs_top1_acc) | ||
top5Acc_norms = calculate_norm_values(ngraph_outputs_top5_acc, | ||
reference_outputs_top5_acc) | ||
|
||
print( | ||
"loss norms are %f %f %f " % | ||
(loss_norms["l1_norm"], loss_norms["l2_norm"], loss_norms["inf_norm"])) | ||
print("top1Acc norms are %f %f %f " % | ||
(top1Acc_norms["l1_norm"], top1Acc_norms["l2_norm"], | ||
top1Acc_norms["inf_norm"])) | ||
print("top5Acc norms are %f %f %f " % | ||
(top5Acc_norms["l1_norm"], top5Acc_norms["l2_norm"], | ||
top5Acc_norms["inf_norm"])) | ||
|
||
loss_result = check_validation_results(loss_norms, "total_loss") | ||
top1Acc_result = check_validation_results(loss_norms, "top1 Accuracy") | ||
top5Acc_result = check_validation_results(loss_norms, "top5 Accuracy") | ||
|
||
if ((loss_result and top1Acc_result and top5Acc_result)): | ||
print("Validation test pass") | ||
|
||
# reapply the patch | ||
output, error_output = command_executor( | ||
'git apply -R ' + './datasets_make_deterministic.patch') | ||
|
||
|
||
# Validation with synthetic data | ||
|
||
if __name__ == "__main__": | ||
reference_file_name_realData = '' | ||
reference_file_name_syntheticData = '' | ||
shresthamalik marked this conversation as resolved.
Show resolved
Hide resolved
|
||
batch_size = 100 | ||
run_validation("real_data", reference_file_name_realData, batch_size) | ||
batch_size = 100 | ||
shresthamalik marked this conversation as resolved.
Show resolved
Hide resolved
|
||
run_validation("synthetic_data", reference_file_name_syntheticData, | ||
batch_size) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
parse_reference_file
andparse_training_output
can be a single function... I think they are separate because one parses a file, and the other parses string. Maybe we keep the string parsing function and just read the file into a string and reuse.