Skip to content

Commit 62b3c18

Browse files
committed
refactor: move some c code to go
Signed-off-by: lifubang <[email protected]>
1 parent 7cce7e2 commit 62b3c18

File tree

10 files changed

+388
-828
lines changed

10 files changed

+388
-828
lines changed

libcontainer/configs/config.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ type IDMap struct {
2727
Size int64 `json:"size"`
2828
}
2929

30+
// ToString is to serize the IDMap to a string.
31+
func (i IDMap) ToString() string {
32+
return fmt.Sprintf("%d %d %d", i.ContainerID, i.HostID, i.Size)
33+
}
34+
3035
// Seccomp represents syscall restrictions
3136
// By default, only the native architecture of the kernel is allowed to be used
3237
// for syscalls. Additional architectures can be added by specifying them in

libcontainer/container_linux.go

Lines changed: 24 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ import (
1818
"github.com/opencontainers/runtime-spec/specs-go"
1919
"github.com/sirupsen/logrus"
2020
"github.com/vishvananda/netlink/nl"
21-
"golang.org/x/sys/execabs"
2221
"golang.org/x/sys/unix"
2322

2423
"github.com/opencontainers/runc/libcontainer/cgroups"
@@ -580,6 +579,10 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
580579
cmd.Env = append(cmd.Env,
581580
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
582581
)
582+
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.stage1SockChild)
583+
cmd.Env = append(cmd.Env,
584+
"_LIBCONTAINER_STAGE1PIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
585+
)
583586
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File())
584587
cmd.Env = append(cmd.Env,
585588
"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
@@ -653,14 +656,16 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
653656
}
654657

655658
init := &initProcess{
656-
cmd: cmd,
657-
comm: comm,
658-
manager: c.cgroupManager,
659+
containerProcess: containerProcess{
660+
cmd: cmd,
661+
comm: comm,
662+
manager: c.cgroupManager,
663+
config: c.newInitConfig(p),
664+
process: p,
665+
bootstrapData: data,
666+
container: c,
667+
},
659668
intelRdtManager: c.intelRdtManager,
660-
config: c.newInitConfig(p),
661-
container: c,
662-
process: p,
663-
bootstrapData: data,
664669
}
665670
c.initProcess = init
666671
return init, nil
@@ -679,15 +684,18 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm
679684
return nil, err
680685
}
681686
proc := &setnsProcess{
682-
cmd: cmd,
687+
containerProcess: containerProcess{
688+
cmd: cmd,
689+
comm: comm,
690+
manager: c.cgroupManager,
691+
config: c.newInitConfig(p),
692+
process: p,
693+
bootstrapData: data,
694+
container: c,
695+
},
683696
cgroupPaths: state.CgroupPaths,
684697
rootlessCgroups: c.config.RootlessCgroups,
685698
intelRdtPath: state.IntelRdtPath,
686-
comm: comm,
687-
manager: c.cgroupManager,
688-
config: c.newInitConfig(p),
689-
process: p,
690-
bootstrapData: data,
691699
initProcessPid: state.InitProcessPid,
692700
}
693701
if len(p.SubCgroupPaths) > 0 {
@@ -1041,17 +1049,6 @@ func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]str
10411049
return paths, nil
10421050
}
10431051

1044-
func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
1045-
data := bytes.NewBuffer(nil)
1046-
for _, im := range idMap {
1047-
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
1048-
if _, err := data.WriteString(line); err != nil {
1049-
return nil, err
1050-
}
1051-
}
1052-
return data.Bytes(), nil
1053-
}
1054-
10551052
// netlinkError is an error wrapper type for use by custom netlink message
10561053
// types. Panics with errors are wrapped in netlinkError so that the recover
10571054
// in bootstrapData can distinguish intentional panics.
@@ -1098,59 +1095,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
10981095
})
10991096
}
11001097

1101-
// write namespace paths only when we are not joining an existing user ns
1102-
_, joinExistingUser := nsMaps[configs.NEWUSER]
1103-
if !joinExistingUser {
1104-
// write uid mappings
1105-
if len(c.config.UIDMappings) > 0 {
1106-
if c.config.RootlessEUID {
1107-
// We resolve the paths for new{u,g}idmap from
1108-
// the context of runc to avoid doing a path
1109-
// lookup in the nsexec context.
1110-
if path, err := execabs.LookPath("newuidmap"); err == nil {
1111-
r.AddData(&Bytemsg{
1112-
Type: UidmapPathAttr,
1113-
Value: []byte(path),
1114-
})
1115-
}
1116-
}
1117-
b, err := encodeIDMapping(c.config.UIDMappings)
1118-
if err != nil {
1119-
return nil, err
1120-
}
1121-
r.AddData(&Bytemsg{
1122-
Type: UidmapAttr,
1123-
Value: b,
1124-
})
1125-
}
1126-
1127-
// write gid mappings
1128-
if len(c.config.GIDMappings) > 0 {
1129-
b, err := encodeIDMapping(c.config.GIDMappings)
1130-
if err != nil {
1131-
return nil, err
1132-
}
1133-
r.AddData(&Bytemsg{
1134-
Type: GidmapAttr,
1135-
Value: b,
1136-
})
1137-
if c.config.RootlessEUID {
1138-
if path, err := execabs.LookPath("newgidmap"); err == nil {
1139-
r.AddData(&Bytemsg{
1140-
Type: GidmapPathAttr,
1141-
Value: []byte(path),
1142-
})
1143-
}
1144-
}
1145-
if requiresRootOrMappingTool(c.config) {
1146-
r.AddData(&Boolmsg{
1147-
Type: SetgroupAttr,
1148-
Value: true,
1149-
})
1150-
}
1151-
}
1152-
}
1153-
11541098
if c.config.OomScoreAdj != nil {
11551099
// write oom_score_adj
11561100
r.AddData(&Bytemsg{
@@ -1159,12 +1103,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
11591103
})
11601104
}
11611105

1162-
// write rootless
1163-
r.AddData(&Boolmsg{
1164-
Type: RootlessEUIDAttr,
1165-
Value: c.config.RootlessEUID,
1166-
})
1167-
11681106
// write boottime and monotonic time ns offsets.
11691107
if c.config.TimeOffsets != nil {
11701108
var offsetSpec bytes.Buffer
@@ -1205,9 +1143,9 @@ func ignoreTerminateErrors(err error) error {
12051143
return err
12061144
}
12071145

1208-
func requiresRootOrMappingTool(c *configs.Config) bool {
1146+
func requiresRootOrMappingTool(gidMappings []configs.IDMap) bool {
12091147
gidMap := []configs.IDMap{
12101148
{ContainerID: 0, HostID: int64(os.Getegid()), Size: 1},
12111149
}
1212-
return !reflect.DeepEqual(c.GIDMappings, gidMap)
1150+
return !reflect.DeepEqual(gidMappings, gidMap)
12131151
}

libcontainer/container_setup.go

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
package libcontainer
2+
3+
import (
4+
"encoding/binary"
5+
"fmt"
6+
"io"
7+
"os"
8+
9+
"github.com/opencontainers/runc/libcontainer/configs"
10+
"github.com/opencontainers/runc/libcontainer/system"
11+
"github.com/sirupsen/logrus"
12+
"github.com/vishvananda/netlink/nl"
13+
"golang.org/x/sys/execabs"
14+
"golang.org/x/sys/unix"
15+
)
16+
17+
// NsExecSyncMsg is used for communication between the parent and child during
18+
// container setup.
19+
type NsExecSyncMsg uint32
20+
21+
const (
22+
syncUsermapPls NsExecSyncMsg = iota + 0x40
23+
syncUsermapAck
24+
syncRecvPidPls
25+
syncRecvPidAck
26+
syncTimeOffsetsPls
27+
syncTimeOffsetsAck
28+
)
29+
30+
type NsExecSetup struct {
31+
process *containerProcess
32+
}
33+
34+
const bufSize int = 4
35+
36+
// parseNsExecSync runs the given callback function on each message received
37+
// from the child. It will return once the child sends SYNC_RECVPID_PLS.
38+
func parseNsExecSync(r io.Reader, fn func(NsExecSyncMsg) error) error {
39+
logrus.Debugf("start to communicate with the nsexec\n")
40+
var msg NsExecSyncMsg
41+
var buf [bufSize]byte
42+
native := nl.NativeEndian()
43+
44+
for {
45+
if _, err := io.ReadAtLeast(r, buf[:], bufSize); err != nil {
46+
return err
47+
}
48+
msg = NsExecSyncMsg(native.Uint32(buf[:]))
49+
if err := fn(msg); err != nil {
50+
return err
51+
}
52+
if msg == syncRecvPidPls {
53+
break
54+
}
55+
}
56+
logrus.Debugf("finished communicating with the nsexec\n")
57+
return nil
58+
}
59+
60+
// ackSyncMsg is used to send a message to the child.
61+
func ackSyncMsg(f *os.File, msg NsExecSyncMsg) error {
62+
var buf [bufSize]byte
63+
native := nl.NativeEndian()
64+
native.PutUint32(buf[:], uint32(msg))
65+
if _, err := unix.Write(int(f.Fd()), buf[:]); err != nil {
66+
logrus.Debugf("failed to write message to nsexec: %v", err)
67+
return err
68+
}
69+
return nil
70+
}
71+
72+
// helpDoingNsExec is used to help the process to communicate with the nsexec.
73+
func (s *NsExecSetup) helpDoingNsExec() error {
74+
return parseNsExecSync(s.process.comm.stage1SockParent, func(msg NsExecSyncMsg) error {
75+
switch msg {
76+
case syncUsermapPls:
77+
logrus.Debugf("stage-1 requested userns mappings")
78+
if err := s.setupUsermap(); err != nil {
79+
return err
80+
}
81+
return ackSyncMsg(s.process.comm.stage1SockParent, syncUsermapAck)
82+
case syncRecvPidPls:
83+
logrus.Debugf("stage-1 reports pid")
84+
var pid uint32
85+
if err := binary.Read(s.process.comm.stage1SockParent, nl.NativeEndian(), &pid); err != nil {
86+
return err
87+
}
88+
s.process.childPid = int(pid)
89+
return ackSyncMsg(s.process.comm.stage1SockParent, syncRecvPidAck)
90+
case syncTimeOffsetsPls:
91+
logrus.Debugf("stage-1 requested timens offsets to be configured")
92+
if err := system.UpdateTimeNsOffsets(s.process.cmd.Process.Pid, s.process.container.config.TimeOffsets); err != nil {
93+
return err
94+
}
95+
return ackSyncMsg(s.process.comm.stage1SockParent, syncTimeOffsetsAck)
96+
default:
97+
}
98+
return fmt.Errorf("unexpected message %d", msg)
99+
})
100+
}
101+
102+
// setupUsermap is used to set up the user mappings.
103+
func (s *NsExecSetup) setupUsermap() error {
104+
var uidMapPath, gidMapPath string
105+
/*
106+
* Enable setgroups(2) if we've been asked to. But we also
107+
* have to explicitly disable setgroups(2) if we're
108+
* creating a rootless container for single-entry mapping.
109+
* i.e. config.is_setgroup == false.
110+
* (this is required since Linux 3.19).
111+
*
112+
* For rootless multi-entry mapping, config.is_setgroup shall be true and
113+
* newuidmap/newgidmap shall be used.
114+
*/
115+
if s.process.config.RootlessEUID && !requiresRootOrMappingTool(s.process.config.Config.GIDMappings) {
116+
_ = system.UpdateSetgroups(s.process.cmd.Process.Pid, system.SetgroupsDeny)
117+
}
118+
119+
nsMaps := make(map[configs.NamespaceType]string)
120+
for _, ns := range s.process.container.config.Namespaces {
121+
if ns.Path != "" {
122+
nsMaps[ns.Type] = ns.Path
123+
}
124+
}
125+
_, joinExistingUser := nsMaps[configs.NEWUSER]
126+
if !joinExistingUser {
127+
// write uid mappings
128+
if len(s.process.container.config.UIDMappings) > 0 {
129+
if s.process.container.config.RootlessEUID {
130+
if path, err := execabs.LookPath("newuidmap"); err == nil {
131+
uidMapPath = path
132+
}
133+
}
134+
}
135+
136+
// write gid mappings
137+
if len(s.process.container.config.GIDMappings) > 0 {
138+
if s.process.container.config.RootlessEUID {
139+
if path, err := execabs.LookPath("newgidmap"); err == nil {
140+
gidMapPath = path
141+
}
142+
}
143+
}
144+
}
145+
146+
/* Set up mappings. */
147+
if err := system.UpdateUidmap(uidMapPath, s.process.cmd.Process.Pid, s.process.container.config.UIDMappings); err != nil {
148+
return err
149+
}
150+
return system.UpdateGidmap(gidMapPath, s.process.cmd.Process.Pid, s.process.container.config.GIDMappings)
151+
}

libcontainer/init_linux.go

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"strconv"
1414
"strings"
1515
"syscall"
16+
"unsafe"
1617

1718
"github.com/containerd/console"
1819
"github.com/moby/sys/user"
@@ -35,11 +36,6 @@ const (
3536
initStandard initType = "standard"
3637
)
3738

38-
type pid struct {
39-
Pid int `json:"stage2_pid"`
40-
PidFirstChild int `json:"stage1_pid"`
41-
}
42-
4339
// network is an internal struct used to setup container networks.
4440
type network struct {
4541
configs.Network
@@ -151,6 +147,11 @@ func startInitialization() (retErr error) {
151147

152148
logrus.SetOutput(logPipe)
153149
logrus.SetFormatter(new(logrus.JSONFormatter))
150+
151+
/* For debugging. */
152+
procName := "runc:[2:INIT]"
153+
_ = unix.Prctl(unix.PR_SET_NAME, uintptr(unsafe.Pointer(&procName)), 0, 0, 0)
154+
154155
logrus.Debug("child process in init()")
155156

156157
// Only init processes have FIFOFD.
@@ -215,6 +216,24 @@ func startInitialization() (retErr error) {
215216
return err
216217
}
217218

219+
if _, err := unix.Setsid(); err != nil {
220+
return fmt.Errorf("setsid failed: %w", err)
221+
}
222+
223+
if err := unix.Setuid(0); err != nil {
224+
return fmt.Errorf("setuid failed %w", err)
225+
}
226+
227+
if err := unix.Setgid(0); err != nil {
228+
return fmt.Errorf("setgid failed %w", err)
229+
}
230+
231+
if !config.RootlessEUID && requiresRootOrMappingTool(config.Config.GIDMappings) {
232+
if err := unix.Setgroups([]int{0}); err != nil {
233+
return fmt.Errorf("setgroups failed %w", err)
234+
}
235+
}
236+
218237
// If init succeeds, it will not return, hence none of the defers will be called.
219238
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe)
220239
}

0 commit comments

Comments
 (0)