Skip to content

LBANN proto integration #76

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions workflows/lbann/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Compiled Object files
*.slo
*.lo
*.o
*.x

# Compiled Dynamic libraries
*.so

# Compiled Static libraries
*.lai
*.la
*.a

*~
.DS_Store

*.d
/.cproject
/.project
/.settings

Debug/
Release/

/output/
scratch/

# ipython notebooks
*.ipynb
.ipynb_checkpoints

# dataspace runtime conf file
conf
experiments
test_data/combo_model.h5
67 changes: 67 additions & 0 deletions workflows/lbann/data/mnist_params.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
[
{
"name": "conv",
"type": "constant",
"value": "6 5 1 16 5 1",
"debug_value": "0 0 0"
},

{
"name": "classes",
"type": "constant",
"value": 10,
"comment": "debug: 1000, default: remove this entry"
},

{
"name": "dense",
"type": "constant",
"value": "120 84"
},

{
"name": "activation",
"type": "categorical",
"element_type": "string",
"values": ["relu", "elu", "relu", "tanh"]
},

{
"name": "optimizer",
"type": "categorical",
"element_type": "string",
"values": ["adam", "sgd", "adagrad"]
},
{
"name": "pool_mode",
"type": "categorical",
"element_type": "string",
"values": ["max", "average"]
},

{
"name": "lr",
"type": "float",
"lower": 0.0001,
"upper": 0.01,
"sigma": 0.045
},


{
"name": "batch_size",
"type": "ordered",
"element_type": "int",
"values": [128, 128, 128, 128, 128, 128],
"sigma": 1
},

{
"name": "epochs",
"type": "int",
"lower": 40,
"upper": 60,
"sigma": 1
}

]
Empty file.
5 changes: 5 additions & 0 deletions workflows/lbann/models/mnist/data/mnist/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*.gz
train-images-idx3-ubyte
train-labels-idx1-ubyte
t10k-images-idx3-ubyte
t10k-labels-idx1-ubyte
59 changes: 59 additions & 0 deletions workflows/lbann/models/mnist/data/mnist/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import gzip
import os
import os.path
import urllib.request

import google.protobuf.text_format
import lbann

# Paths
data_dir = os.path.dirname(os.path.realpath(__file__))

def download_data():
"""Download MNIST data files, if needed.

Data files are downloaded from http://yann.lecun.com/exdb/mnist/
and uncompressed. Does nothing if the files already exist.

"""

# MNIST data files and associated URLs
urls = {
'train-images-idx3-ubyte': 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz',
'train-labels-idx1-ubyte': 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz',
't10k-images-idx3-ubyte': 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz',
't10k-labels-idx1-ubyte': 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz',
}

# Download and uncompress MNIST data files, if needed
for data_file, url in urls.items():
data_file = os.path.join(data_dir, data_file)
compressed_file = data_file + '.gz'
if not os.path.isfile(data_file):
urllib.request.urlretrieve(url, filename=compressed_file)
with gzip.open(compressed_file, 'rb') as in_file:
with open(data_file, 'wb') as out_file:
out_file.write(in_file.read())

def make_data_reader():
"""Make Protobuf message for MNIST data reader.

MNIST data is downloaded if needed.

"""

# Download MNIST data files
download_data()

# Load Protobuf message from file
protobuf_file = os.path.join(data_dir, 'data_reader.prototext')
message = lbann.lbann_pb2.LbannPB()
with open(protobuf_file, 'r') as f:
google.protobuf.text_format.Merge(f.read(), message)
message = message.data_reader

# Set paths
for reader in message.reader:
reader.data_filedir = data_dir

return message
30 changes: 30 additions & 0 deletions workflows/lbann/models/mnist/data/mnist/data_reader.prototext
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
data_reader {
reader {
name: "mnist"
role: "train"
shuffle: true
data_filedir: "lbann/applications/vision/data/mnist"
data_filename: "train-images-idx3-ubyte"
label_filename: "train-labels-idx1-ubyte"
validation_percent: 0.1
percent_of_data_to_use: 1.0
transforms {
scale {
scale: 0.003921568627 # 1/255
}
}
}
reader {
name: "mnist"
role: "test"
data_filedir: "lbann/applications/vision/data/mnist"
data_filename: "t10k-images-idx3-ubyte"
label_filename: "t10k-labels-idx1-ubyte"
percent_of_data_to_use: 1.0
transforms {
scale {
scale: 0.003921568627 # 1/255
}
}
}
}
185 changes: 185 additions & 0 deletions workflows/lbann/models/mnist/mnist_baseline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
import pandas as pd
import numpy as np
import os
import sys
import gzip
import argparse
##LBANN stuff
import lbann
import data.mnist
import lbann.contrib.args
import lbann.contrib.launcher

try:
import configparser
except ImportError:
import ConfigParser as configparser



file_path = os.path.dirname(os.path.realpath(__file__))

def common_parser(parser):

parser.add_argument("--config_file", dest='config_file', type=str,
default=os.path.join(file_path, 'mnist_default_model.txt'),
help="specify model configuration file")
parser.add_argument("--nodes", type=int, default=8)

return parser

def get_model_parser():

parser = argparse.ArgumentParser(prog='mnist_baseline', formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='MNIST LBANN ')

return common_parser(parser).parse_args()

def read_config_file(file):
#print("Reading default config (param) file : ", file)
config=configparser.ConfigParser()
config.read(file)
section=config.sections()
fileParams={}

fileParams['model_name']=eval(config.get(section[0],'model_name'))
fileParams['conv']=eval(config.get(section[0],'conv'))
fileParams['dense']=eval(config.get(section[0],'dense'))
fileParams['activation']=eval(config.get(section[0],'activation'))
fileParams['pool_mode']=eval(config.get(section[0],'pool_mode'))
#fileParams['optimizer']=eval(config.get(section[0],'optimizer'))
fileParams['epochs']=eval(config.get(section[0],'epochs'))
fileParams['batch_size']=eval(config.get(section[0],'batch_size'))
fileParams['classes']=eval(config.get(section[0],'classes'))
fileParams['save']=eval(config.get(section[0], 'save'))
fileParams['lr']=eval(config.get(section[0], 'lr'))

return fileParams

def initialize_parameters(args):
# Get command-line parameters
#args = get_model_parser()
#args = parser.parse_args()
# Get parameters from configuration file
gParameters = read_config_file(args.config_file)
return gParameters

def get_activation(name, x):
if name == 'relu':
return lbann.Relu(x)
elif name == 'tanh' :
return lbann.Tanh(x)
elif name == 'elu' :
return lbann.Elu(x)
elif name == 'selu' :
return lbann.Selu(x)
elif name == 'leaky_relu' :
return lbann.LeakyRelu(x)
elif name == 'softplus' :
return lbann.Softplus(x)


def run(gParameters,run_args,exp_dir=None):

#convs: out_c, conv_dim, conv_stride
conv_outc= []
conv_dim = []
conv_stride = []
conv_params = list(range(0, len(gParameters['conv']), 3))
for l, i in enumerate(conv_params):
conv_outc.append(gParameters['conv'][i])
conv_dim.append(gParameters['conv'][i+1])
conv_stride.append(gParameters['conv'][i+2])

# Input data
input_ = lbann.Input(target_mode='classification')
images = lbann.Identity(input_)
labels = lbann.Identity(input_)
# LeNet
x = lbann.Convolution(images,
num_dims = 2,
num_output_channels = conv_outc[0],
num_groups = 1,
conv_dims_i = conv_dim[0],
conv_strides_i = conv_stride[0],
conv_dilations_i = 1,
has_bias = True)
x = get_activation(gParameters['activation'],x)
x = lbann.Pooling(x,
num_dims = 2,
pool_dims_i = 2,
pool_strides_i = 2,
pool_mode = str(gParameters['pool_mode']))
x = lbann.Convolution(x,
num_dims = 2,
num_output_channels = conv_outc[1],
num_groups = 1,
conv_dims_i = conv_dim[1],
conv_strides_i = conv_stride[1],
conv_dilations_i = 1,
has_bias = True)
x = get_activation(gParameters['activation'],x)
x = lbann.Pooling(x,
num_dims = 2,
pool_dims_i = 2,
pool_strides_i = 2,
pool_mode = str(gParameters['pool_mode']))
x = lbann.FullyConnected(x, num_neurons = gParameters['dense'][0], has_bias = True)
x = get_activation(gParameters['activation'],x)
x = lbann.FullyConnected(x, num_neurons = gParameters['dense'][1], has_bias = True)
x = get_activation(gParameters['activation'],x)
x = lbann.FullyConnected(x, num_neurons = gParameters['classes'], has_bias = True)
probs = lbann.Softmax(x)

# Loss function and accuracy
loss = lbann.CrossEntropy(probs, labels)
acc = lbann.CategoricalAccuracy(probs, labels)
lr = gParameters['lr']
opt = lbann.SGD(learn_rate=lr, momentum=0.9)
##Uncomment to support optimizer exchange
'''
if gParameters['optimizer'] == 'adam':
opt = lbann.Adam(learn_rate=lr, beta1=0.9, beta2=0.99, eps=1e-8)
elif gParameters['optimizer'] == 'adagrad':
opt = lbann.AdaGrad(learn_rate=lr, eps=1e-8)
'''
model = lbann.Model(gParameters['epochs'],
layers=lbann.traverse_layer_graph(input_),
objective_function=loss,
metrics=[lbann.Metric(acc, name='accuracy', unit='%')],
callbacks=[lbann.CallbackPrintModelDescription(),
lbann.CallbackPrint(),
lbann.CallbackTimer()])
#lbann.CallbackLTFB(batch_interval=100,metric='accuracy')])

# Setup data reader
data_reader = data.mnist.make_data_reader()

# Setup trainer
job_name = "t"+ str(gParameters['run_id']-1)
trainer = lbann.Trainer(name=job_name, mini_batch_size=gParameters['batch_size'])
status = lbann.contrib.launcher.run(
trainer,
model,
data_reader,
opt,
#work_dir=gParameters['save'],
work_dir=exp_dir,
nodes=run_args.nodes,
#proto_file_name=job_name+"exp.prototext",
proto_file_name="experiment.prototext.trainer"+str(gParameters['run_id']-1),
job_name=job_name,
setup_only = True,
#batch_job = True,
lbann_args=['--generate_multi_proto --procs_per_trainer=4']
#lbann_args=['--generate_multi_proto']
)

def main():

args = get_model_parser()
gParameters = initialize_parameters(args)
run(gParameters)

if __name__ == '__main__':
main()
Loading