Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify code structure of training job api #1300

Merged
merged 6 commits into from
Jul 14, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions cmd/tf-operator.v1/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ import (

"github.com/kubeflow/common/pkg/util/signals"
"github.com/kubeflow/tf-operator/cmd/tf-operator.v1/app/options"
v1 "github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1"
tfjobclientset "github.com/kubeflow/tf-operator/pkg/client/clientset/versioned"
"github.com/kubeflow/tf-operator/pkg/client/clientset/versioned/scheme"
tfjobinformers "github.com/kubeflow/tf-operator/pkg/client/informers/externalversions"
"github.com/kubeflow/tf-operator/pkg/common"
controller "github.com/kubeflow/tf-operator/pkg/controller.v1/tensorflow"
"github.com/kubeflow/tf-operator/pkg/version"
)
Expand Down Expand Up @@ -75,7 +75,7 @@ func Run(opt *options.ServerOption) error {
version.PrintVersionAndExit(apiVersion)
}

namespace := os.Getenv(v1.EnvKubeflowNamespace)
namespace := os.Getenv(common.EnvKubeflowNamespace)
if len(namespace) == 0 {
log.Infof("EnvKubeflowNamespace not set, use default namespace %s",
metav1.NamespaceDefault)
Expand Down Expand Up @@ -232,7 +232,7 @@ func createClientSets(config *restclientset.Config) (
func checkCRDExists(clientset apiextensionclientset.Interface, namespace string) bool {
crd, err := clientset.ApiextensionsV1beta1().
CustomResourceDefinitions().
Get(context.TODO(), v1.TFCRD, metav1.GetOptions{})
Get(context.TODO(), "tfjobs.kubeflow.org", metav1.GetOptions{})

if err != nil {
log.Error(err)
Expand Down
75 changes: 72 additions & 3 deletions hack/update-codegen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
# This shell is used to auto generate some useful tools for k8s, such as lister,
# informer, deepcopy, defaulter and so on.

# Ignore shellcheck for IDEs
# shellcheck disable=SC2116
# shellcheck disable=SC2046
# shellcheck disable=SC2006

set -o errexit
set -o nounset
set -o pipefail
Expand All @@ -35,6 +40,17 @@ fi

echo ">> Using ${CODEGEN_PKG}"

# Grab openapi-gen version from go.mod
OPENAPI_VERSION=$(grep 'k8s.io/kube-openapi' go.sum | awk '{print $2}' | sed 's/\/go.mod//g' | head -1)
OPENAPI_PKG=$(echo `go env GOPATH`"/pkg/mod/k8s.io/kube-openapi@${OPENAPI_VERSION}")

if [[ ! -d ${OPENAPI_PKG} ]]; then
echo "${OPENAPI_PKG} is missing. Running 'go mod download'."
go mod download
fi

echo ">> Using ${OPENAPI_PKG}"

# code-generator does work with go.mod but makes assumptions about
# the project living in `$GOPATH/src`. To work around this and support
# any location; create a temporary directory, use this as an output
Expand All @@ -60,21 +76,74 @@ cd ${SCRIPT_ROOT}
${CODEGEN_PKG}/generate-groups.sh "all" \
github.com/kubeflow/tf-operator/pkg/client github.com/kubeflow/tf-operator/pkg/apis \
tensorflow:v1 \
--output-base "${TEMP_DIR}" \
--output-base "${TEMP_DIR}" \
--go-header-file hack/boilerplate/boilerplate.go.txt

# Notice: The code in code-generator does not generate defaulter by default.
# We need to build binary from vendor cmd folder.
#echo "Building defaulter-gen"
#go build -o defaulter-gen ${CODEGEN_PKG}/cmd/defaulter-gen

# ${GOPATH}/bin/defaulter-gen is automatically built from ${CODEGEN_PKG}/generate-groups.sh
echo "Generating defaulters for tensorflow/v1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 -O zz_generated.defaults --output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 --go-header-file hack/boilerplate/boilerplate.go.txt "$@"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \
Jeffwan marked this conversation as resolved.
Show resolved Hide resolved
-O zz_generated.defaults \
--output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating defaulters for pytorch/v1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \
-O zz_generated.defaults \
--output-package github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating defaulters for mxnet/v1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \
-O zz_generated.defaults \
--output-package github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating defaulters for xgboost/v1"
${GOPATH}/bin/defaulter-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \
-O zz_generated.defaults \
--output-package github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

cd - > /dev/null

# Notice: The code in kube-openapi does not generate defaulter by default.
# We need to build binary from pkg cmd folder.
echo "Building openapi-gen"
go build -o openapi-gen ${OPENAPI_PKG}/cmd/openapi-gen

echo "Generating OpenAPI specification for tensorflow/v1"
${GOPATH}/bin/openapi-gen --report-filename=hack/violation_exception.list --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version --output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 --go-header-file hack/boilerplate/boilerplate.go.txt "$@"
./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version \
--report-filename=hack/violation_exception.list \
--output-package github.com/kubeflow/tf-operator/pkg/apis/tensorflow/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating OpenAPI specification for pytorch/v1"
./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version \
--report-filename=hack/violation_exception.list \
--output-package github.com/kubeflow/tf-operator/pkg/apis/pytorch/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating OpenAPI specification for mxnet/v1"
./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version \
--report-filename=hack/violation_exception.list \
--output-package github.com/kubeflow/tf-operator/pkg/apis/mxnet/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

echo "Generating OpenAPI specification for xgboost/v1"
./openapi-gen --input-dirs github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1,k8s.io/api/core/v1,k8s.io/apimachinery/pkg/apis/meta/v1,k8s.io/apimachinery/pkg/api/resource,k8s.io/apimachinery/pkg/runtime,k8s.io/apimachinery/pkg/util/intstr,k8s.io/apimachinery/pkg/version \
--report-filename=hack/violation_exception.list \
--output-package github.com/kubeflow/tf-operator/pkg/apis/xgboost/v1 \
--go-header-file hack/boilerplate/boilerplate.go.txt "$@"

cd - > /dev/null

# Copy everything back.
cp -a "${TEMP_DIR}/${ROOT_PKG}/." "${SCRIPT_ROOT}/"

# Clean up binaries we build for update codegen
rm ./openapi-gen
8 changes: 4 additions & 4 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,19 +89,19 @@ func main() {
// TODO: We need a general manager. all rest reconciler addsToManager
// Based on the user configuration, we start different controllers
if err = xgboostcontroller.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "XGBoostJob")
setupLog.Error(err, "unable to create controller", "controller", xgboostv1.Kind)
os.Exit(1)
}
if err = pytorchcontroller.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PyTorchJob")
setupLog.Error(err, "unable to create controller", "controller", pytorchv1.Kind)
os.Exit(1)
}
if err = tensorflowcontroller.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "TFJob")
setupLog.Error(err, "unable to create controller", "controller", tensorflowv1.Kind)
os.Exit(1)
}
if err = mxnetcontroller.NewReconciler(mgr).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "MXJob")
setupLog.Error(err, "unable to create controller", "controller", mxnetv1.Kind)
os.Exit(1)
}
//+kubebuilder:scaffold:builder
Expand Down
5 changes: 2 additions & 3 deletions pkg/apis/mxnet/v1/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,6 @@ package v1
import commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"

const (
// EnvKubeflowNamespace is ENV for kubeflow namespace specified by user.
EnvKubeflowNamespace = "KUBEFLOW_NAMESPACE"

// DefaultPortName is name of the port used to communicate between scheduler and
// servers & workers.
DefaultPortName = "mxjob-port"
Expand All @@ -15,4 +12,6 @@ const (
DefaultPort = 9091
// DefaultRestartPolicy is default RestartPolicy for MXReplicaSpec.
DefaultRestartPolicy = commonv1.RestartPolicyNever
// Kind is the kind name.
Kind = "MXJob"
)
107 changes: 107 additions & 0 deletions pkg/apis/mxnet/v1/defaults.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2018 The Kubeflow Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1

import (
"strings"

commonv1 "github.com/kubeflow/common/pkg/apis/common/v1"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
)

// Int32 is a helper routine that allocates a new int32 value
// to store v and returns a pointer to it.
func Int32(v int32) *int32 {
return &v
}

func addDefaultingFuncs(scheme *runtime.Scheme) error {
return RegisterDefaults(scheme)
}

// setDefaultPort sets the default ports for mxnet container.
func setDefaultPort(spec *v1.PodSpec) {
index := 0
for i, container := range spec.Containers {
if container.Name == DefaultContainerName {
index = i
break
}
}

hasMXJobPort := false
for _, port := range spec.Containers[index].Ports {
if port.Name == DefaultPortName {
hasMXJobPort = true
break
}
}
if !hasMXJobPort {
spec.Containers[index].Ports = append(spec.Containers[index].Ports, v1.ContainerPort{
Name: DefaultPortName,
ContainerPort: DefaultPort,
})
}
}

func setDefaultReplicas(spec *commonv1.ReplicaSpec) {
if spec.Replicas == nil {
spec.Replicas = Int32(1)
}
if spec.RestartPolicy == "" {
spec.RestartPolicy = DefaultRestartPolicy
}
}

// setTypeNamesToCamelCase sets the name of all replica types from any case to correct case.
func setTypeNamesToCamelCase(mxJob *MXJob) {
setTypeNameToCamelCase(mxJob, MXReplicaTypeScheduler)
setTypeNameToCamelCase(mxJob, MXReplicaTypeServer)
setTypeNameToCamelCase(mxJob, MXReplicaTypeWorker)
}

// setTypeNameToCamelCase sets the name of the replica type from any case to correct case.
// E.g. from server to Server; from WORKER to Worker.
func setTypeNameToCamelCase(mxJob *MXJob, typ commonv1.ReplicaType) {
for t := range mxJob.Spec.MXReplicaSpecs {
if strings.EqualFold(string(t), string(typ)) && t != typ {
spec := mxJob.Spec.MXReplicaSpecs[t]
delete(mxJob.Spec.MXReplicaSpecs, t)
mxJob.Spec.MXReplicaSpecs[typ] = spec
return
}
}
}

// SetDefaults_MXJob sets any unspecified values to defaults.
func SetDefaults_MXJob(mxjob *MXJob) {
// Set default cleanpod policy to All.
if mxjob.Spec.RunPolicy.CleanPodPolicy == nil {
all := commonv1.CleanPodPolicyAll
mxjob.Spec.RunPolicy.CleanPodPolicy = &all
}

// Update the key of MXReplicaSpecs to camel case.
setTypeNamesToCamelCase(mxjob)

for _, spec := range mxjob.Spec.MXReplicaSpecs {
// Set default replicas to 1.
setDefaultReplicas(spec)
// Set default port to mxnet container.
setDefaultPort(&spec.Template.Spec)
}
}
Loading