From d14968b66a7beca87e9c778a522e44fd2988d0cc Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 14 Jun 2019 14:07:49 +0800 Subject: [PATCH] qemu: use x-ignore-shared to implement vm template qemu upstream has x-ignore-shared that works similar to our private bypass-shared-memory. We can use it to implement the vm template feature. Fixes: #1798 Depends-on: github.com/kata-containers/packaging#641 Signed-off-by: Peng Tao --- virtcontainers/qemu.go | 206 +++++++++++++++++++------------ virtcontainers/qemu_amd64.go | 2 - virtcontainers/qemu_arch_base.go | 10 +- virtcontainers/qemu_arm64.go | 6 +- virtcontainers/qemu_ppc64le.go | 2 - virtcontainers/qemu_s390x.go | 5 +- virtcontainers/qemu_test.go | 49 ++++++++ 7 files changed, 189 insertions(+), 91 deletions(-) diff --git a/virtcontainers/qemu.go b/virtcontainers/qemu.go index d395ec0112..fef119cdfa 100644 --- a/virtcontainers/qemu.go +++ b/virtcontainers/qemu.go @@ -421,8 +421,7 @@ func (q *qemu) setupTemplate(knobs *govmmQemu.Knobs, memory *govmmQemu.Memory) g } if q.config.BootFromTemplate { - incoming.MigrationType = govmmQemu.MigrationExec - incoming.Exec = "cat " + q.config.DevicesStatePath + incoming.MigrationType = govmmQemu.MigrationDefer } } @@ -585,6 +584,98 @@ func (q *qemu) vhostFSSocketPath(id string) (string, error) { return utils.BuildSocketPath(store.RunVMStoragePath, id, vhostFSSocket) } +func (q *qemu) virtiofsdArgs(sockPath string) []string { + // The daemon will terminate when the vhost-user socket + // connection with QEMU closes. Therefore we do not keep track + // of this child process after returning from this function. + sourcePath := filepath.Join(kataHostSharedDir, q.id) + args := []string{ + "-o", "vhost_user_socket=" + sockPath, + "-o", "source=" + sourcePath, + "-o", "cache=" + q.config.VirtioFSCache} + if q.config.Debug { + args = append(args, "-d") + } else { + args = append(args, "-f") + } + + return args +} + +func (q *qemu) setupVirtiofsd(timeout int) (remain int, err error) { + sockPath, err := q.vhostFSSocketPath(q.id) + if err != nil { + return 0, err + } + + cmd := exec.Command(q.config.VirtioFSDaemon, q.virtiofsdArgs(sockPath)...) + stderr, err := cmd.StderrPipe() + if err != nil { + return 0, err + } + + if err = cmd.Start(); err != nil { + return 0, err + } + defer func() { + if err != nil { + cmd.Process.Kill() + } + }() + + // Wait for socket to become available + sockReady := make(chan error, 1) + timeStart := time.Now() + go func() { + scanner := bufio.NewScanner(stderr) + var sent bool + for scanner.Scan() { + if q.config.Debug { + q.Logger().WithField("source", "virtiofsd").Debug(scanner.Text()) + } + if !sent && strings.Contains(scanner.Text(), "Waiting for vhost-user socket connection...") { + sockReady <- nil + sent = true + } + } + if !sent { + if err := scanner.Err(); err != nil { + sockReady <- err + } else { + sockReady <- fmt.Errorf("virtiofsd did not announce socket connection") + } + } + q.Logger().Info("virtiofsd quits") + q.stopSandbox() + }() + + return q.waitVirtiofsd(timeStart, timeout, sockReady, + fmt.Sprintf("virtiofsd (pid=%d) socket %s", cmd.Process.Pid, sockPath)) +} + +func (q *qemu) waitVirtiofsd(start time.Time, timeout int, ready chan error, errMsg string) (int, error) { + var err error + + timeoutDuration := time.Duration(timeout) * time.Second + select { + case err = <-ready: + case <-time.After(timeoutDuration): + err = fmt.Errorf("timed out waiting for %s", errMsg) + } + if err != nil { + return 0, err + } + + // Now reduce timeout by the elapsed time + elapsed := time.Since(start) + if elapsed < timeoutDuration { + timeout = timeout - int(elapsed.Seconds()) + } else { + timeout = 0 + } + return timeout, nil +} + // startSandbox will start the Sandbox's VM. func (q *qemu) startSandbox(timeout int) error { span, _ := q.trace("startSandbox") @@ -625,81 +716,10 @@ func (q *qemu) startSandbox(timeout int) error { }() if q.config.SharedFS == config.VirtioFS { - sockPath, err := q.vhostFSSocketPath(q.id) + timeout, err = q.setupVirtiofsd(timeout) if err != nil { return err } - - // The daemon will terminate when the vhost-user socket - // connection with QEMU closes. Therefore we do not keep track - // of this child process after returning from this function. - sourcePath := filepath.Join(kataHostSharedDir, q.id) - args := []string{ - "-o", "vhost_user_socket=" + sockPath, - "-o", "source=" + sourcePath, - "-o", "cache=" + q.config.VirtioFSCache} - if q.config.Debug { - args = append(args, "-d") - } else { - args = append(args, "-f") - } - cmd := exec.Command(q.config.VirtioFSDaemon, args...) - stderr, err := cmd.StderrPipe() - if err != nil { - return err - } - - if err = cmd.Start(); err != nil { - return err - } - defer func() { - if err != nil { - cmd.Process.Kill() - } - }() - - // Wait for socket to become available - sockReady := make(chan error, 1) - timeStart := time.Now() - go func() { - scanner := bufio.NewScanner(stderr) - var sent bool - for scanner.Scan() { - if q.config.Debug { - q.Logger().WithField("source", "virtiofsd").Debug(scanner.Text()) - } - if !sent && strings.Contains(scanner.Text(), "Waiting for vhost-user socket connection...") { - sockReady <- nil - sent = true - } - } - if !sent { - if err := scanner.Err(); err != nil { - sockReady <- err - } else { - sockReady <- fmt.Errorf("virtiofsd did not announce socket connection") - } - } - q.Logger().Info("virtiofsd quits") - q.stopSandbox() - }() - timeoutDuration := time.Duration(timeout) * time.Second - select { - case err = <-sockReady: - case <-time.After(timeoutDuration): - err = fmt.Errorf("timed out waiting for virtiofsd (pid=%d) socket %s", cmd.Process.Pid, sockPath) - } - if err != nil { - return err - } - - // Now reduce timeout by the elapsed time - elapsed := time.Since(timeStart) - if elapsed < timeoutDuration { - timeout = timeout - int(elapsed.Seconds()) - } else { - timeout = 0 - } } var strErr string @@ -709,9 +729,39 @@ func (q *qemu) startSandbox(timeout int) error { } err = q.waitSandbox(timeout) // the virtiofsd deferred checks err's value + if err != nil { + return err + } + + if q.config.BootFromTemplate { + if err = q.bootFromTemplate(); err != nil { + return err + } + } + return err } +func (q *qemu) bootFromTemplate() error { + err := q.qmpSetup() + if err != nil { + return err + } + defer q.qmpShutdown() + + err = q.arch.setIgnoreSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp) + if err != nil { + q.Logger().WithError(err).Error("set migration ignore shared memory") + return err + } + uri := fmt.Sprintf("exec:cat %s", q.config.DevicesStatePath) + err = q.qmpMonitorCh.qmp.ExecuteMigrationIncoming(q.qmpMonitorCh.ctx, uri) + if err != nil { + return err + } + return q.waitMigration() +} + // waitSandbox will wait for the Sandbox's VM to be up and running. func (q *qemu) waitSandbox(timeout int) error { span, _ := q.trace("waitSandbox") @@ -1482,9 +1532,9 @@ func (q *qemu) saveSandbox() error { // BootToBeTemplate sets the VM to be a template that other VMs can clone from. We would want to // bypass shared memory when saving the VM to a local file through migration exec. if q.config.BootToBeTemplate { - err := q.arch.setBypassSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp) + err := q.arch.setIgnoreSharedMemoryMigrationCaps(q.qmpMonitorCh.ctx, q.qmpMonitorCh.qmp) if err != nil { - q.Logger().WithError(err).Error("set migration bypass shared memory") + q.Logger().WithError(err).Error("set migration ignore shared memory") return err } } @@ -1495,6 +1545,10 @@ func (q *qemu) saveSandbox() error { return err } + return q.waitMigration() +} + +func (q *qemu) waitMigration() error { t := time.NewTimer(qmpMigrationWaitTimeout) defer t.Stop() for { diff --git a/virtcontainers/qemu_amd64.go b/virtcontainers/qemu_amd64.go index 88cee2f11f..280b94ad65 100644 --- a/virtcontainers/qemu_amd64.go +++ b/virtcontainers/qemu_amd64.go @@ -27,8 +27,6 @@ const defaultQemuMachineType = QemuPC const defaultQemuMachineOptions = "accel=kvm,kernel_irqchip,nvdimm" -const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory" - const qmpMigrationWaitTimeout = 5 * time.Second var qemuPaths = map[string]string{ diff --git a/virtcontainers/qemu_arch_base.go b/virtcontainers/qemu_arch_base.go index 47268c0eb6..4e4ce492ce 100644 --- a/virtcontainers/qemu_arch_base.go +++ b/virtcontainers/qemu_arch_base.go @@ -102,8 +102,8 @@ type qemuArch interface { // supportGuestMemoryHotplug returns if the guest supports memory hotplug supportGuestMemoryHotplug() bool - // setBypassSharedMemoryMigrationCaps set bypass-shared-memory capability for migration - setBypassSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error + // setIgnoreSharedMemoryMigrationCaps set bypass-shared-memory capability for migration + setIgnoreSharedMemoryMigrationCaps(context.Context, *govmmQemu.QMP) error } type qemuArchBase struct { @@ -153,6 +153,8 @@ const ( // QemuCCWVirtio is a QEMU virt machine type for for s390x QemuCCWVirtio = "s390-ccw-virtio" + + qmpCapMigrationIgnoreShared = "x-ignore-shared" ) // kernelParamsNonDebug is a list of the default kernel @@ -579,10 +581,10 @@ func (q *qemuArchBase) supportGuestMemoryHotplug() bool { return true } -func (q *qemuArchBase) setBypassSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error { +func (q *qemuArchBase) setIgnoreSharedMemoryMigrationCaps(ctx context.Context, qmp *govmmQemu.QMP) error { err := qmp.ExecSetMigrationCaps(ctx, []map[string]interface{}{ { - "capability": qmpCapMigrationBypassSharedMemory, + "capability": qmpCapMigrationIgnoreShared, "state": true, }, }) diff --git a/virtcontainers/qemu_arm64.go b/virtcontainers/qemu_arm64.go index 02f19fd18b..6cf3191079 100644 --- a/virtcontainers/qemu_arm64.go +++ b/virtcontainers/qemu_arm64.go @@ -29,8 +29,6 @@ const defaultQemuMachineType = QemuVirt const qmpMigrationWaitTimeout = 10 * time.Second -const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory" - var defaultQemuMachineOptions = "usb=off,accel=kvm,nvdimm,gic-version=" + getGuestGICVersion() var qemuPaths = map[string]string{ @@ -199,7 +197,7 @@ func (q *qemuArm64) appendImage(devices []govmmQemu.Device, path string) ([]govm return devices, nil } -func (q *qemuArm64) setBypassSharedMemoryMigrationCaps(_ context.Context, _ *govmmQemu.QMP) error { - // bypass-shared-memory not support in arm64 for now +func (q *qemuArm64) setIgnoreSharedMemoryMigrationCaps(_ context.Context, _ *govmmQemu.QMP) error { + // x-ignore-shared not support in arm64 for now return nil } diff --git a/virtcontainers/qemu_ppc64le.go b/virtcontainers/qemu_ppc64le.go index 302940434a..afad877815 100644 --- a/virtcontainers/qemu_ppc64le.go +++ b/virtcontainers/qemu_ppc64le.go @@ -30,8 +30,6 @@ const defaultQemuMachineOptions = "accel=kvm,usb=off,cap-cfpc=broken,cap-sbbc=br const defaultMemMaxPPC64le = 32256 // Restrict MemMax to 32Gb on PPC64le -const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory" - const qmpMigrationWaitTimeout = 5 * time.Second var qemuPaths = map[string]string{ diff --git a/virtcontainers/qemu_s390x.go b/virtcontainers/qemu_s390x.go index 5b1f3b0bd5..c1bf90f393 100644 --- a/virtcontainers/qemu_s390x.go +++ b/virtcontainers/qemu_s390x.go @@ -7,10 +7,11 @@ package virtcontainers import ( "fmt" + "time" + govmmQemu "github.com/intel/govmm/qemu" "github.com/kata-containers/runtime/virtcontainers/device/config" "github.com/kata-containers/runtime/virtcontainers/types" - "time" ) type qemuS390x struct { @@ -26,8 +27,6 @@ const defaultQemuMachineOptions = "accel=kvm" const virtioSerialCCW = "virtio-serial-ccw" -const qmpCapMigrationBypassSharedMemory = "bypass-shared-memory" - const qmpMigrationWaitTimeout = 5 * time.Second var qemuPaths = map[string]string{ diff --git a/virtcontainers/qemu_test.go b/virtcontainers/qemu_test.go index 893226dd79..515fbd7132 100644 --- a/virtcontainers/qemu_test.go +++ b/virtcontainers/qemu_test.go @@ -13,7 +13,9 @@ import ( "os" "path/filepath" "reflect" + "strings" "testing" + "time" govmmQemu "github.com/intel/govmm/qemu" "github.com/kata-containers/runtime/virtcontainers/device/config" @@ -561,3 +563,50 @@ func createQemuSandboxConfig() (*Sandbox, error) { return &sandbox, nil } + +func TestQemuVirtiofsdArgs(t *testing.T) { + assert := assert.New(t) + + q := &qemu{ + id: "foo", + config: HypervisorConfig{ + VirtioFSCache: "none", + Debug: true, + }, + } + + savedKataHostSharedDir := kataHostSharedDir + kataHostSharedDir = "test-share-dir" + defer func() { + kataHostSharedDir = savedKataHostSharedDir + }() + + result := "-o vhost_user_socket=bar1 -o source=test-share-dir/foo -o cache=none -d" + args := q.virtiofsdArgs("bar1") + assert.Equal(strings.Join(args, " "), result) + + q.config.Debug = false + result = "-o vhost_user_socket=bar2 -o source=test-share-dir/foo -o cache=none -f" + args = q.virtiofsdArgs("bar2") + assert.Equal(strings.Join(args, " "), result) +} + +func TestQemuWaitVirtiofsd(t *testing.T) { + assert := assert.New(t) + + q := &qemu{} + + ready := make(chan error, 1) + timeout := 5 + + ready <- nil + remain, err := q.waitVirtiofsd(time.Now(), timeout, ready, "") + assert.Nil(err) + assert.True(remain <= timeout) + assert.True(remain >= 0) + + timeout = 0 + remain, err = q.waitVirtiofsd(time.Now(), timeout, ready, "") + assert.NotNil(err) + assert.True(remain == 0) +}