diff --git a/src/Craned/TaskManager.cpp b/src/Craned/TaskManager.cpp index d4b60471..b705b784 100644 --- a/src/Craned/TaskManager.cpp +++ b/src/Craned/TaskManager.cpp @@ -641,16 +641,18 @@ CraneErr TaskManager::SpawnProcessInInstance_( // Move the ownership of ProcessInstance into the TaskInstance. instance->processes.emplace(child_pid, std::move(process)); - close(io_in_sock_pair[1]); - close(io_out_sock_pair[1]); - close(ctrl_sock_pair[1]); int ctrl_fd = ctrl_sock_pair[0]; - bool ok; + close(ctrl_sock_pair[1]); + if (instance->IsCrun()) { + close(io_in_sock_pair[1]); + close(io_out_sock_pair[1]); + } setegid(saved_priv.gid); seteuid(saved_priv.uid); setgroups(0, nullptr); + bool ok; FileInputStream istream(ctrl_fd); FileOutputStream ostream(ctrl_fd); CanStartMessage msg; @@ -691,10 +693,16 @@ CraneErr TaskManager::SpawnProcessInInstance_( // subprocess should continue to exec(). msg.set_ok(true); ok = SerializeDelimitedToZeroCopyStream(msg, &ostream); - ok &= ostream.Flush(); if (!ok) { - CRANE_ERROR("Failed to send ok=true to subprocess {} for task #{}", - child_pid, instance->task.task_id()); + CRANE_ERROR("Failed to serialize msg to ostream: {}", + strerror(ostream.GetErrno())); + } + + if (ok) ok &= ostream.Flush(); + if (!ok) { + CRANE_ERROR("Failed to send ok=true to subprocess {} for task #{}: {}", + child_pid, instance->task.task_id(), + strerror(ostream.GetErrno())); close(ctrl_fd); // Communication failure caused by process crash or grpc error. @@ -707,10 +715,15 @@ CraneErr TaskManager::SpawnProcessInInstance_( return CraneErr::kOk; } - ParseDelimitedFromZeroCopyStream(&child_process_ready, &istream, nullptr); - if (!msg.ok()) { - CRANE_ERROR("Failed to read protobuf from subprocess {} of task #{}", - child_pid, instance->task.task_id()); + ok = ParseDelimitedFromZeroCopyStream(&child_process_ready, &istream, + nullptr); + if (!ok || !msg.ok()) { + if (!ok) + CRANE_ERROR("Socket child endpoint failed: {}", + strerror(istream.GetErrno())); + if (!msg.ok()) + CRANE_ERROR("False from subprocess {} of task #{}", child_pid, + instance->task.task_id()); close(ctrl_fd); // See comments above. @@ -768,8 +781,18 @@ CraneErr TaskManager::SpawnProcessInInstance_( ChildProcessReady child_process_ready; bool ok; - ParseDelimitedFromZeroCopyStream(&msg, &istream, nullptr); - if (!msg.ok()) std::abort(); + ok = ParseDelimitedFromZeroCopyStream(&msg, &istream, nullptr); + if (!ok || !msg.ok()) { + if (!ok) { + int err = istream.GetErrno(); + CRANE_ERROR("Failed to read socket from parent: {}", strerror(err)); + } + + if (!msg.ok()) + CRANE_ERROR("Parent process ask not to start the subprocess."); + + std::abort(); + } if (instance->task.type() == crane::grpc::Batch) { int stdout_fd, stderr_fd;