diff --git a/controllers/ovncontroller_controller.go b/controllers/ovncontroller_controller.go index f2421fc4..a5d2713e 100644 --- a/controllers/ovncontroller_controller.go +++ b/controllers/ovncontroller_controller.go @@ -612,7 +612,6 @@ func (r *OVNControllerReconciler) reconcileNormal(ctx context.Context, instance instance.Status.DesiredNumberScheduled = dset.GetDaemonSet().Status.DesiredNumberScheduled instance.Status.NumberReady = dset.GetDaemonSet().Status.NumberReady - // Define a new DaemonSet object for OVS (ovsdb-server + ovs-vswitchd) ovsdset := daemonset.NewDaemonSet( ovncontroller.CreateOVSDaemonSet(instance, inputHash, ovsServiceLabels, serviceAnnotations, topology), time.Duration(5)*time.Second, @@ -744,6 +743,11 @@ func (r *OVNControllerReconciler) generateServiceConfigMaps( } else { templateParameters["OVNEncapNIC"] = "eth0" } + if instance.Spec.TLS.Enabled() { + templateParameters["TLS"] = "Enabled" + } else { + templateParameters["TLS"] = "Disabled" + } cms := []util.Template{ // ScriptsConfigMap { diff --git a/pkg/ovncontroller/daemonset.go b/pkg/ovncontroller/daemonset.go index aab8d67e..f14ab43d 100644 --- a/pkg/ovncontroller/daemonset.go +++ b/pkg/ovncontroller/daemonset.go @@ -24,6 +24,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/utils/ptr" ) @@ -236,6 +237,27 @@ func CreateOVSDaemonSet( envVars := map[string]env.Setter{} envVars["CONFIG_HASH"] = env.SetValue(configHash) + volumes := []corev1.Volume{} + mounts := []corev1.VolumeMount{} + + // add OVN dbs cert and CA + if instance.Spec.TLS.Enabled() { + svc := tls.Service{ + SecretName: *instance.Spec.TLS.GenericService.SecretName, + CertMount: ptr.To(ovn_common.OVNDbCertPath), + KeyMount: ptr.To(ovn_common.OVNDbKeyPath), + CaMount: ptr.To(ovn_common.OVNDbCaCertPath), + } + volumes = append(volumes, svc.CreateVolume(ovnv1.ServiceNameOVS)) + mounts = append(mounts, svc.CreateVolumeMounts(ovnv1.ServiceNameOVS)...) + + // add CA bundle if defined + if instance.Spec.TLS.CaBundleSecretName != "" { + volumes = append(volumes, instance.Spec.TLS.CreateVolume()) + mounts = append(mounts, instance.Spec.TLS.CreateVolumeMounts(nil)...) + } + } + initContainers := []corev1.Container{ { Name: "ovsdb-server-init", @@ -250,7 +272,7 @@ func CreateOVSDaemonSet( Privileged: &privileged, }, Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetOVSDbVolumeMounts(), + VolumeMounts: append(GetOVSDbVolumeMounts(), mounts...), }, } @@ -276,7 +298,7 @@ func CreateOVSDaemonSet( Privileged: &privileged, }, Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetOVSDbVolumeMounts(), + VolumeMounts: append(GetOVSDbVolumeMounts(), mounts...), // TODO: consider the fact that resources are now double booked Resources: instance.Spec.Resources, LivenessProbe: ovsDbLivenessProbe, @@ -303,7 +325,7 @@ func CreateOVSDaemonSet( Privileged: &privileged, }, Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVswitchdVolumeMounts(), + VolumeMounts: append(GetVswitchdVolumeMounts(), mounts...), // TODO: consider the fact that resources are now double booked Resources: instance.Spec.Resources, LivenessProbe: ovsVswitchdLivenessProbe, @@ -312,6 +334,9 @@ func CreateOVSDaemonSet( }, } + maxUnavailable := intstr.FromInt32(0) + maxSurge := intstr.FromInt32(1) + daemonset := &appsv1.DaemonSet{ ObjectMeta: metav1.ObjectMeta{ Name: ovnv1.ServiceNameOVS, @@ -327,9 +352,17 @@ func CreateOVSDaemonSet( }, Spec: corev1.PodSpec{ ServiceAccountName: instance.RbacResourceName(), + HostPID: true, InitContainers: initContainers, Containers: containers, - Volumes: GetOVSVolumes(instance.Name, instance.Namespace), + Volumes: append(GetOVSVolumes(instance.Name, instance.Namespace), volumes...), + }, + }, + UpdateStrategy: appsv1.DaemonSetUpdateStrategy{ + Type: appsv1.RollingUpdateDaemonSetStrategyType, + RollingUpdate: &appsv1.RollingUpdateDaemonSet{ + MaxUnavailable: &maxUnavailable, + MaxSurge: &maxSurge, }, }, }, diff --git a/templates/ovncontroller/bin/init-ovsdb-server.sh b/templates/ovncontroller/bin/init-ovsdb-server.sh index ef17e3c3..10750eff 100755 --- a/templates/ovncontroller/bin/init-ovsdb-server.sh +++ b/templates/ovncontroller/bin/init-ovsdb-server.sh @@ -18,15 +18,71 @@ set -ex source $(dirname $0)/functions trap wait_for_db_creation EXIT +function init_ovsdb_server { + # Initialize or upgrade database if needed + CTL_ARGS="--system-id=random --no-ovs-vswitchd" + /usr/share/openvswitch/scripts/ovs-ctl start $CTL_ARGS + /usr/share/openvswitch/scripts/ovs-ctl stop $CTL_ARGS + + if [ ! -f /var/lib/openvswitch/already_executed ]; then + # If file was not present, set status INIT + echo "INIT" > /var/lib/openvswitch/already_executed + fi + + wait_for_db_creation + trap - EXIT +} + # If db file is empty, remove it; otherwise service won't start. # See https://issues.redhat.com/browse/FDP-689 for more details. if ! [ -s ${DB_FILE} ]; then rm -f ${DB_FILE} fi -# Initialize or upgrade database if needed -CTL_ARGS="--system-id=random --no-ovs-vswitchd" -/usr/share/openvswitch/scripts/ovs-ctl start $CTL_ARGS -/usr/share/openvswitch/scripts/ovs-ctl stop $CTL_ARGS -wait_for_db_creation -trap - EXIT +# Check if file is created, if not means it's first execution +if [ -f /var/lib/openvswitch/already_executed ]; then + if [ {{ .TLS }} == "Enabled" ]; then + # TLS is used + TLSOptions="--certificate=/etc/pki/tls/certs/ovndb.crt --private-key=/etc/pki/tls/private/ovndb.key --ca-cert=/etc/pki/tls/certs/ovndbca.crt" + DBOptions="--db ssl:ovsdbserver-nb.openstack.svc.cluster.local:6641" + else + # Normal TCP is used + TLSOptions="" + DBOptions="--db tcp:ovsdbserver-nb.openstack.svc.cluster.local:6641" + fi + + # Need to double check that ovsdb-server and vswitchd are actually running + # (That pod was not unhealty and it got destroyed) + # In the following steps we need ovsdb-server to be running, check pid file + if [ ! -f /run/openvswitch/ovsdb-server.pid ]; then + # No PID file, start as normal + echo "No PID file found, init ovsdb_server as it's the only pod" + init_ovsdb_server + exit 0 + fi + # File is created, no need to run ovs-ctl + # Change state to "UPDATE" + echo "UPDATE" > /var/lib/openvswitch/already_executed + # Clear possible leftovers of past executions + ## Need to lower chassis priority + # First get the system-id + chassis_id=$(ovs-vsctl get Open_Vswitch . external_ids:system-id) + nb_output=$(ovn-nbctl --no-leader-only $DBOptions $TLSOptions --columns=_uuid,priority find Gateway_Chassis chassis_name=$chassis_id) + # Check that nbctl was executed correctly + if [ $? -ne 0 ]; then + echo "ERROR: ovn-nbctl find command failed" + exit 1 + fi + row_uuid=$(echo "$nb_output" | grep "_uuid" | cut -d':' -f2 | xargs) + priority=$(echo "$nb_output" | grep "priority" | cut -d':' -f2 | xargs) + # Save priority to be able to restore it later (It's overwritting, not appending, hence no check) + echo $priority > /var/lib/openvswitch/old_priority + # Set lower priority (lowest value possible 0) + ovn-nbctl --no-leader-only $DBOptions $TLSOptions set Gateway_Chassis $row_uuid priority=0 + # Check that nbctl was executed correctly + if [ $? -ne 0 ]; then + echo "ERROR: ovn-nbctl set command failed" + exit 1 + fi + exit 0 +fi diff --git a/templates/ovncontroller/bin/start-ovsdb-server.sh b/templates/ovncontroller/bin/start-ovsdb-server.sh index 6c87f0a4..bf702950 100755 --- a/templates/ovncontroller/bin/start-ovsdb-server.sh +++ b/templates/ovncontroller/bin/start-ovsdb-server.sh @@ -17,12 +17,51 @@ set -ex source $(dirname $0)/functions +echo "start ovsdb-server" + +# Check state +if [ -f /var/lib/openvswitch/already_executed ]; then + if [ $(cat /var/lib/openvswitch/already_executed) == "UPDATE" ]; then + echo "In a middle of an upgrade" + # Need to stop vswitch and dbserver + # First stop vswitchd + vswitchd_pid=$(cat /run/openvswitch/ovs-vswitchd.pid) + # Stop vswitch + echo "stopping vswitchd" + bash /usr/local/bin/container-scripts/stop-vswitchd.sh + echo "Done, stopped vswitchd" + # Wait for vswitchd to end checking status + while true; do + if [ $(cat /var/lib/openvswitch/already_executed) == "RESTART_VSWITCHD" ]; then + break + fi + sleep 0.1 + done + echo "Status is already RESTART_VSWITCHD" + bash /usr/local/bin/container-scripts/stop-ovsdb-server.sh + echo "Done, stopped ovsdb-server" + # vswitchd stopped + # We still need to run the ovsdb-server in this new container, this can be done + # with the flag --overwrite-pidfile but we need to ignore the next SIGTERM that will + # send openshift, creating file to noop the stop-ovsdb-server.sh + echo "setting flag to skip ovsdb-server stop" + touch /var/lib/openvswitch/skip_stop_ovsdbserver + else + # It could happen that ovsdb-server or ovs-vwsitchd pod can't start correctly or can't get to running state + # this would cause this script to be run with already_executed with an state different than "UPDATE" + : + fi +fi + # Remove the obsolete semaphore file in case it still exists. cleanup_ovsdb_server_semaphore +# Set state to "OVSDB_SERVER" +echo "OVSDB_SERVER" > /var/lib/openvswitch/already_executed + # Start the service ovsdb-server ${DB_FILE} \ - --pidfile \ + --pidfile --overwrite-pidfile \ --remote=punix:/var/run/openvswitch/db.sock \ --private-key=db:Open_vSwitch,SSL,private_key \ --certificate=db:Open_vSwitch,SSL,certificate \ diff --git a/templates/ovncontroller/bin/start-vswitchd.sh b/templates/ovncontroller/bin/start-vswitchd.sh index c9ee0a24..108609c7 100755 --- a/templates/ovncontroller/bin/start-vswitchd.sh +++ b/templates/ovncontroller/bin/start-vswitchd.sh @@ -14,6 +14,33 @@ # License for the specific language governing permissions and limitations # under the License. +# Check which connection is used +if [ {{ .TLS }} == "Enabled" ]; then + # TLS is used + TLSOptions="--certificate=/etc/pki/tls/certs/ovndb.crt --private-key=/etc/pki/tls/private/ovndb.key --ca-cert=/etc/pki/tls/certs/ovndbca.crt" + DBOptions="--db ssl:ovsdbserver-nb.openstack.svc.cluster.local:6641" +else + # Normal TCP is used + TLSOptions="" + DBOptions="--db tcp:ovsdbserver-nb.openstack.svc.cluster.local:6641" +fi + + +# Check if we're doing an update +echo "Check if we're doing an update" +if [ -f /var/lib/openvswitch/already_executed ]; then + while true; do + if [ $(cat /var/lib/openvswitch/already_executed) == "OVSDB_SERVER" ]; then + break + fi + if [ $(cat /var/lib/openvswitch/already_executed) == "INIT" ]; then + break + fi + sleep 0.1 + done + echo "OVSDBSERVER Already up, start script" +fi + source $(dirname $0)/functions wait_for_ovsdb_server @@ -30,7 +57,7 @@ ovs-vsctl --no-wait set open_vswitch . other_config:flow-restore-wait=true # It's safe to start vswitchd now. Do it. # --detach to allow the execution to continue to restoring the flows. -/usr/sbin/ovs-vswitchd --pidfile --mlockall --detach +/usr/sbin/ovs-vswitchd --pidfile --overwrite-pidfile --mlockall --detach # Restore saved flows. if [ -f $FLOWS_RESTORE_SCRIPT ]; then @@ -49,6 +76,30 @@ cleanup_flows_backup # Now, inform vswitchd that we are done. ovs-vsctl remove open_vswitch . other_config flow-restore-wait +# Restore the priority if this was changed during the update +if [ -f /var/lib/openvswitch/old_priority ]; then + echo "Using DBOptions: $DBOptions" + echo "Using TLSOptions: $TLSOptions" + priority=$(cat /var/lib/openvswitch/old_priority) + echo "Restoring old priority, which was: $priority" + chassis_id=$(ovs-vsctl get Open_Vswitch . external_ids:system-id) + nb_output=$(ovn-nbctl --no-leader-only $DBOptions $TLSOptions --columns=_uuid,priority find Gateway_Chassis chassis_name=$chassis_id) + err=$? + if [ $err -ne 0 ]; then + echo "Error while getting gateway chassis uuid $err" + fi + row_uuid=$(echo "$nb_output" | grep "_uuid" | cut -d':' -f2 | xargs) + rm /var/lib/openvswitch/old_priority + ovn-nbctl --no-leader-only $DBOptions $TLSOptions set Gateway_Chassis $row_uuid priority=$priority + err=$? + if [ $err -ne 0 ]; then + echo "Error while setting gateway chassis priority ($priority), error: $err" + fi +fi + +# Set state to "RUNNING" +echo "RUNNING" > /var/lib/openvswitch/already_executed + # This is container command script. Block it from exiting, otherwise k8s will # restart the container again. sleep infinity diff --git a/templates/ovncontroller/bin/stop-ovsdb-server.sh b/templates/ovncontroller/bin/stop-ovsdb-server.sh index 2d15c507..0e1923d7 100755 --- a/templates/ovncontroller/bin/stop-ovsdb-server.sh +++ b/templates/ovncontroller/bin/stop-ovsdb-server.sh @@ -14,6 +14,12 @@ # License for the specific language governing permissions and limitations # under the License. +if [ -f /var/lib/openvswitch/skip_stop_ovsdbserver ]; then + echo "Skipping stop script" + rm /var/lib/openvswitch/skip_stop_ovsdbserver + exit 0 +fi + set -ex source $(dirname $0)/functions @@ -26,5 +32,13 @@ while [ ! -f $SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE ]; do done cleanup_ovsdb_server_semaphore +while [ $(cat /var/log/openvswitch/already_executed) != "RESTART_VSWITCHD" ]; do + # Wait to vswitchd container to finish it's stop process + sleep 0.1 +done + # Now it's safe to stop db server. Do it. /usr/share/openvswitch/scripts/ovs-ctl stop --no-ovs-vswitchd + +# Update state to "RESTART_DBSERVER" +echo "RESTART_DBSERVER" > /var/lib/openvswitch/already_executed diff --git a/templates/ovncontroller/bin/stop-vswitchd.sh b/templates/ovncontroller/bin/stop-vswitchd.sh index d12f74c9..d1a6a74e 100755 --- a/templates/ovncontroller/bin/stop-vswitchd.sh +++ b/templates/ovncontroller/bin/stop-vswitchd.sh @@ -14,6 +14,11 @@ # License for the specific language governing permissions and limitations # under the License. +if [ -f /var/lib/openvswitch/skip_stop_vswitchd ]; then + echo "Skipping stop script" + rm /var/lib/openvswitch/skip_stop_vswitchd + exit 0 +fi set -ex source $(dirname $0)/functions @@ -32,5 +37,16 @@ TMPDIR=$FLOWS_RESTORE_DIR /usr/share/openvswitch/scripts/ovs-save save-flows $br # unlocks the db preStop script, working as a semaphore touch $SAFE_TO_STOP_OVSDB_SERVER_SEMAPHORE +# If it's comming from an update, it means that the +# new pod, hence we're still missing the signal from the openshift. To avoid +# running this script twice, create file to skip the following SIGTERM signal +if [ -f /var/lib/openvswitch/already_executed ]; then + if [ $(cat /var/lib/openvswitch/already_executed) == "UPDATE" ]; then + touch /var/lib/openvswitch/skip_stop_vswitchd + fi +fi +# Update state to "RESTART_VSWITCHD" +echo "RESTART_VSWITCHD" > /var/lib/openvswitch/already_executed + # Finally, stop vswitchd. /usr/share/openvswitch/scripts/ovs-ctl stop --no-ovsdb-server