Skip to content

Commit a6d6391

Browse files
committed
add support for containers with multiple network namespaces
1 parent 0fb14e3 commit a6d6391

File tree

1 file changed

+85
-49
lines changed

1 file changed

+85
-49
lines changed

containers/container.go

+85-49
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,27 @@ type L7Stats struct {
8080
Latency prometheus.Histogram
8181
}
8282

83+
type ListenDetails struct {
84+
ClosedAt time.Time
85+
NsIPs []netaddr.IP
86+
}
87+
88+
type Process struct {
89+
Pid uint32
90+
StartedAt time.Time
91+
NetNsId string
92+
}
93+
94+
func (p *Process) isHostNs() bool {
95+
return p.NetNsId == hostNetNsId
96+
}
97+
8398
type Container struct {
8499
id ContainerID
85100
cgroup *cgroup.Cgroup
86101
metadata *ContainerMetadata
87102

88-
pids map[uint32]time.Time // pid -> start time
103+
processes map[uint32]*Process
89104

90105
startedAt time.Time
91106
zombieAt time.Time
@@ -95,7 +110,7 @@ type Container struct {
95110
delaysByPid map[uint32]Delays
96111
delaysLock sync.Mutex
97112

98-
listens map[netaddr.IPPort]map[uint32]time.Time // listen addr -> pid -> close time
113+
listens map[netaddr.IPPort]map[uint32]*ListenDetails
99114

100115
connectsSuccessful map[AddrPair]int64 // dst:actual_dst -> count
101116
connectsFailed map[netaddr.IPPort]int64 // dst -> count
@@ -109,11 +124,8 @@ type Container struct {
109124

110125
mounts map[string]proc.MountInfo
111126

112-
nsIPs []netaddr.IP
113-
114127
logParsers map[string]*LogParser
115128

116-
isHostNs bool
117129
hostConntrack *Conntrack
118130
nsConntrack *Conntrack
119131
lbConntracks []*Conntrack
@@ -134,11 +146,11 @@ func NewContainer(id ContainerID, cg *cgroup.Cgroup, md *ContainerMetadata, host
134146
cgroup: cg,
135147
metadata: md,
136148

137-
pids: map[uint32]time.Time{},
149+
processes: map[uint32]*Process{},
138150

139151
delaysByPid: map[uint32]Delays{},
140152

141-
listens: map[netaddr.IPPort]map[uint32]time.Time{},
153+
listens: map[netaddr.IPPort]map[uint32]*ListenDetails{},
142154

143155
connectsSuccessful: map[AddrPair]int64{},
144156
connectsFailed: map[netaddr.IPPort]int64{},
@@ -151,7 +163,6 @@ func NewContainer(id ContainerID, cg *cgroup.Cgroup, md *ContainerMetadata, host
151163

152164
logParsers: map[string]*LogParser{},
153165

154-
isHostNs: hostNetNsId == netNs.UniqueId(),
155166
hostConntrack: hostConntrack,
156167

157168
done: make(chan struct{}),
@@ -304,7 +315,7 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
304315

305316
appTypes := map[string]struct{}{}
306317
seenJvms := map[string]bool{}
307-
for pid := range c.pids {
318+
for pid := range c.processes {
308319
cmdline := proc.GetCmdline(pid)
309320
if len(cmdline) == 0 {
310321
continue
@@ -352,15 +363,21 @@ func (c *Container) onProcessStart(pid uint32) {
352363
if err != nil {
353364
return
354365
}
366+
ns, err := proc.GetNetNs(pid)
367+
if err != nil {
368+
return
369+
}
370+
defer ns.Close()
355371
c.zombieAt = time.Time{}
356-
c.pids[pid] = stats.BeginTime
372+
c.processes[pid] = &Process{Pid: pid, StartedAt: stats.BeginTime, NetNsId: ns.UniqueId()}
373+
357374
if c.startedAt.IsZero() {
358375
c.startedAt = stats.BeginTime
359376
} else {
360377
min := stats.BeginTime
361-
for _, t := range c.pids {
362-
if t.Before(min) {
363-
min = t
378+
for _, p := range c.processes {
379+
if p.StartedAt.Before(min) {
380+
min = p.StartedAt
364381
}
365382
}
366383
if min.After(c.startedAt) {
@@ -373,8 +390,8 @@ func (c *Container) onProcessStart(pid uint32) {
373390
func (c *Container) onProcessExit(pid uint32, oomKill bool) {
374391
c.lock.Lock()
375392
defer c.lock.Unlock()
376-
delete(c.pids, pid)
377-
if len(c.pids) == 0 {
393+
delete(c.processes, pid)
394+
if len(c.processes) == 0 {
378395
c.zombieAt = time.Now()
379396
}
380397
delete(c.delaysByPid, pid)
@@ -418,9 +435,10 @@ func (c *Container) onListenOpen(pid uint32, addr netaddr.IPPort, safe bool) {
418435
defer c.lock.Unlock()
419436
}
420437
if _, ok := c.listens[addr]; !ok {
421-
c.listens[addr] = map[uint32]time.Time{}
438+
c.listens[addr] = map[uint32]*ListenDetails{}
422439
}
423-
c.listens[addr][pid] = time.Time{}
440+
details := &ListenDetails{}
441+
c.listens[addr][pid] = details
424442

425443
if addr.IP().IsUnspecified() {
426444
ns, err := proc.GetNetNs(pid)
@@ -434,7 +452,7 @@ func (c *Container) onListenOpen(pid uint32, addr netaddr.IPPort, safe bool) {
434452
if ips, err := proc.GetNsIps(ns); err != nil {
435453
klog.Warningln(err)
436454
} else {
437-
c.nsIPs = ips
455+
details.NsIPs = ips
438456
}
439457
}
440458
}
@@ -444,13 +462,19 @@ func (c *Container) onListenClose(pid uint32, addr netaddr.IPPort) {
444462
defer c.lock.Unlock()
445463
if _, byAddr := c.listens[addr]; byAddr {
446464
if _, byPid := c.listens[addr][pid]; byPid {
447-
c.listens[addr][pid] = time.Now()
465+
if details := c.listens[addr][pid]; details != nil {
466+
details.ClosedAt = time.Now()
467+
}
448468
}
449469
}
450470
}
451471

452472
func (c *Container) onConnectionOpen(pid uint32, fd uint64, src, dst netaddr.IPPort, timestamp uint64, failed bool) {
453-
if dst.IP().IsLoopback() && !c.isHostNs {
473+
p := c.processes[pid]
474+
if p == nil {
475+
return
476+
}
477+
if dst.IP().IsLoopback() && !p.isHostNs() {
454478
return
455479
}
456480
whitelisted := false
@@ -468,7 +492,7 @@ func (c *Container) onConnectionOpen(pid uint32, fd uint64, src, dst netaddr.IPP
468492
if failed {
469493
c.connectsFailed[dst]++
470494
} else {
471-
actualDst, err := c.getActualDestination(pid, src, dst)
495+
actualDst, err := c.getActualDestination(p, src, dst)
472496
if err != nil {
473497
if !common.IsNotExist(err) {
474498
klog.Warningf("cannot open NetNs for pid %d: %s", pid, err)
@@ -478,7 +502,7 @@ func (c *Container) onConnectionOpen(pid uint32, fd uint64, src, dst netaddr.IPP
478502
switch {
479503
case actualDst == nil:
480504
actualDst = &dst
481-
case actualDst.IP().IsLoopback() && !c.isHostNs:
505+
case actualDst.IP().IsLoopback() && !p.isHostNs():
482506
return
483507
}
484508
c.connectsSuccessful[AddrPair{src: dst, dst: *actualDst}]++
@@ -493,7 +517,7 @@ func (c *Container) onConnectionOpen(pid uint32, fd uint64, src, dst netaddr.IPP
493517
c.connectLastAttempt[dst] = time.Now()
494518
}
495519

496-
func (c *Container) getActualDestination(pid uint32, src, dst netaddr.IPPort) (*netaddr.IPPort, error) {
520+
func (c *Container) getActualDestination(p *Process, src, dst netaddr.IPPort) (*netaddr.IPPort, error) {
497521
if actualDst := lookupCiliumConntrackTable(src, dst); actualDst != nil {
498522
return actualDst, nil
499523
}
@@ -506,9 +530,9 @@ func (c *Container) getActualDestination(pid uint32, src, dst netaddr.IPPort) (*
506530
if actualDst != nil {
507531
return actualDst, nil
508532
}
509-
if !c.isHostNs {
533+
if !p.isHostNs() {
510534
if c.nsConntrack == nil {
511-
netNs, err := proc.GetNetNs(pid)
535+
netNs, err := proc.GetNetNs(p.Pid)
512536
if err != nil {
513537
return nil, err
514538
}
@@ -608,7 +632,7 @@ func (c *Container) onRetransmit(srcDst AddrPair) bool {
608632
func (c *Container) updateDelays() {
609633
c.delaysLock.Lock()
610634
defer c.delaysLock.Unlock()
611-
for pid := range c.pids {
635+
for pid := range c.processes {
612636
stats, err := TaskstatsTGID(pid)
613637
if err != nil {
614638
continue
@@ -629,7 +653,7 @@ func (c *Container) getMounts() map[string]map[string]*proc.FSStat {
629653
res := map[string]map[string]*proc.FSStat{}
630654
for _, mi := range c.mounts {
631655
var stat *proc.FSStat
632-
for pid := range c.pids {
656+
for pid := range c.processes {
633657
s, err := proc.StatFS(proc.Path(pid, "root", mi.MountPoint))
634658
if err == nil {
635659
stat = &s
@@ -651,20 +675,28 @@ func (c *Container) getListens() map[netaddr.IPPort]int {
651675
res := map[netaddr.IPPort]int{}
652676
for addr, byPid := range c.listens {
653677
open := 0
654-
for _, closedAt := range byPid {
655-
if closedAt.IsZero() {
678+
isHostNs := false
679+
ips := map[netaddr.IP]bool{}
680+
for pid, details := range byPid {
681+
p := c.processes[pid]
682+
if p == nil {
683+
continue
684+
}
685+
if p.isHostNs() {
686+
isHostNs = true
687+
}
688+
if details.ClosedAt.IsZero() {
656689
open = 1
657-
break
690+
}
691+
for _, ip := range details.NsIPs {
692+
ips[ip] = true
658693
}
659694
}
660-
var ips []netaddr.IP
661-
if addr.IP().IsUnspecified() {
662-
ips = c.nsIPs
663-
} else {
664-
ips = []netaddr.IP{addr.IP()}
695+
if !addr.IP().IsUnspecified() {
696+
ips = map[netaddr.IP]bool{addr.IP(): true}
665697
}
666-
for _, ip := range ips {
667-
if ip.IsLoopback() && !c.isHostNs {
698+
for ip := range ips {
699+
if ip.IsLoopback() && !isHostNs {
668700
continue
669701
}
670702
res[netaddr.IPPortFrom(ip, addr.Port())] = open
@@ -723,7 +755,7 @@ func (c *Container) getProxiedListens() map[string]map[netaddr.IPPort]struct{} {
723755

724756
func (c *Container) ping() map[netaddr.IP]float64 {
725757
netNs := netns.None()
726-
for pid := range c.pids {
758+
for pid := range c.processes {
727759
if pid == agentPid {
728760
netNs = selfNetNs
729761
break
@@ -829,8 +861,12 @@ func (c *Container) gc(now time.Time) {
829861
established := map[AddrPair]struct{}{}
830862
establishedDst := map[netaddr.IPPort]struct{}{}
831863
listens := map[netaddr.IPPort]string{}
832-
for pid := range c.pids {
833-
sockets, err := proc.GetSockets(pid)
864+
seenNamespaces := map[string]bool{}
865+
for _, p := range c.processes {
866+
if seenNamespaces[p.NetNsId] {
867+
continue
868+
}
869+
sockets, err := proc.GetSockets(p.Pid)
834870
if err != nil {
835871
continue
836872
}
@@ -842,7 +878,7 @@ func (c *Container) gc(now time.Time) {
842878
establishedDst[s.DAddr] = struct{}{}
843879
}
844880
}
845-
break
881+
seenNamespaces[p.NetNsId] = true
846882
}
847883

848884
c.revalidateListens(now, listens)
@@ -888,9 +924,9 @@ func (c *Container) revalidateListens(now time.Time, actualListens map[netaddr.I
888924
continue
889925
}
890926
klog.Warningln("deleting the outdated listen:", addr)
891-
for pid, closedAt := range byPid {
892-
if closedAt.IsZero() {
893-
byPid[pid] = now
927+
for _, details := range byPid {
928+
if details.ClosedAt.IsZero() {
929+
details.ClosedAt = now
894930
}
895931
}
896932
}
@@ -903,8 +939,8 @@ func (c *Container) revalidateListens(now time.Time, actualListens map[netaddr.I
903939
continue
904940
}
905941
open := false
906-
for _, closedAt := range byPids {
907-
if closedAt.IsZero() {
942+
for _, details := range byPids {
943+
if details.ClosedAt.IsZero() {
908944
open = true
909945
break
910946
}
@@ -916,7 +952,7 @@ func (c *Container) revalidateListens(now time.Time, actualListens map[netaddr.I
916952

917953
if len(missingListens) > 0 {
918954
inodeToPid := map[string]uint32{}
919-
for pid := range c.pids {
955+
for pid := range c.processes {
920956
fds, err := proc.ReadFds(pid)
921957
if err != nil {
922958
continue
@@ -938,8 +974,8 @@ func (c *Container) revalidateListens(now time.Time, actualListens map[netaddr.I
938974
}
939975

940976
for addr, pids := range c.listens {
941-
for pid, closedAt := range pids {
942-
if !closedAt.IsZero() && now.Sub(closedAt) > gcInterval {
977+
for pid, details := range pids {
978+
if !details.ClosedAt.IsZero() && now.Sub(details.ClosedAt) > gcInterval {
943979
delete(c.listens[addr], pid)
944980
}
945981
}

0 commit comments

Comments
 (0)