Skip to content

Commit c4ad3c8

Browse files
ayushr2gvisor-bot
authored andcommitted
overlayfs: Do not SetStat() UID/GID for newly created file.
Instead, use a credentials object which has the target UID/GID set as the EffectiveKUID/EffectiveKGID. This will ensure that the file is created with the right owners. This is consistent with what Linux does. This avoids making unnecessary SetStatAt() calls to the VFS, which may incur additional RPCs. This commit additionally fixes the following bugs: - When parent directory has the SGID bit set, newly created symlinks should inherit the GID from the parent directory. This was not happening earlier. - There was a bug in newChildOwnerStat() where it was checking an uninitialized `stat.Mode` to check if the newly created file is a directory or not. That would always evaluate to false, so child directories would not have the SGID bit set. PiperOrigin-RevId: 767693990
1 parent 7232b29 commit c4ad3c8

File tree

4 files changed

+65
-86
lines changed

4 files changed

+65
-86
lines changed

pkg/sentry/fsimpl/overlay/BUILD

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@ declare_mutex(
5353
prefix = "directoryFD",
5454
)
5555

56+
declare_mutex(
57+
name = "create_creds_mutex",
58+
out = "create_creds_mutex.go",
59+
package = "overlay",
60+
prefix = "createCreds",
61+
)
62+
5663
declare_rwmutex(
5764
name = "rename_rwmutex",
5865
out = "rename_rwmutex.go",
@@ -91,9 +98,10 @@ go_library(
9198
srcs = [
9299
"ancestry_rwmutex.go",
93100
"copy_up.go",
101+
"create_creds_mutex.go",
94102
"data_rwmutex.go",
95103
"dev_mutex.go",
96-
"dir_cache_mutex",
104+
"dir_cache_mutex.go",
97105
"dir_fd_mutex.go",
98106
"dir_mutex.go",
99107
"directory.go",

pkg/sentry/fsimpl/overlay/directory.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@ func (d *dentry) isDir() bool {
2626
return d.mode.Load()&linux.S_IFMT == linux.S_IFDIR
2727
}
2828

29+
func (d *dentry) isSGIDSet() bool {
30+
return d.mode.Load()&linux.ModeSetGID != 0
31+
}
32+
2933
// Preconditions:
3034
// - d.dirMu must be locked.
3135
// - d.isDir().

pkg/sentry/fsimpl/overlay/filesystem.go

Lines changed: 15 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,8 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
693693
return err
694694
}
695695
}
696-
if err := vfsObj.LinkAt(ctx, fs.creds, &vfs.PathOperation{
696+
createCreds := parent.credsForCreate(rp.Credentials(), parent.isSGIDSet())
697+
if err := vfsObj.LinkAt(ctx, createCreds, &vfs.PathOperation{
697698
Root: old.upperVD,
698699
Start: old.upperVD,
699700
}, &newpop); err != nil {
@@ -702,21 +703,6 @@ func (fs *filesystem) LinkAt(ctx context.Context, rp *vfs.ResolvingPath, vd vfs.
702703
}
703704
return err
704705
}
705-
creds := rp.Credentials()
706-
if err := vfsObj.SetStatAt(ctx, fs.creds, &newpop, &vfs.SetStatOptions{
707-
Stat: linux.Statx{
708-
Mask: linux.STATX_UID | linux.STATX_GID,
709-
UID: uint32(creds.EffectiveKUID),
710-
GID: uint32(creds.EffectiveKGID),
711-
},
712-
}); err != nil {
713-
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &newpop); cleanupErr != nil {
714-
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after LinkAt metadata update failure: %v", cleanupErr))
715-
} else if haveUpperWhiteout {
716-
fs.cleanupRecreateWhiteout(ctx, vfsObj, &newpop)
717-
}
718-
return err
719-
}
720706
old.watches.Notify(ctx, "", linux.IN_ATTRIB, 0 /* cookie */, vfs.InodeEvent, false /* unlinked */)
721707
return nil
722708
})
@@ -740,23 +726,19 @@ func (fs *filesystem) MkdirAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
740726
return err
741727
}
742728
}
743-
if err := vfsObj.MkdirAt(ctx, fs.creds, &pop, &opts); err != nil {
729+
sgidSet := parent.isSGIDSet()
730+
if sgidSet {
731+
// Directories inherit the SGID bit.
732+
opts.Mode |= linux.ModeSetGID
733+
}
734+
createCreds := parent.credsForCreate(rp.Credentials(), sgidSet)
735+
if err := vfsObj.MkdirAt(ctx, createCreds, &pop, &opts); err != nil {
744736
if haveUpperWhiteout {
745737
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
746738
}
747739
return err
748740
}
749741

750-
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
751-
Stat: parent.newChildOwnerStat(opts.Mode, rp.Credentials()),
752-
}); err != nil {
753-
if cleanupErr := vfsObj.RmdirAt(ctx, fs.creds, &pop); cleanupErr != nil {
754-
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer directory after MkdirAt metadata update failure: %v", cleanupErr))
755-
} else if haveUpperWhiteout {
756-
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
757-
}
758-
return err
759-
}
760742
if haveUpperWhiteout {
761743
// A whiteout is being replaced with this new directory. There may be
762744
// directories on lower layers (previously hidden by the whiteout) that
@@ -807,23 +789,13 @@ func (fs *filesystem) MknodAt(ctx context.Context, rp *vfs.ResolvingPath, opts v
807789
return err
808790
}
809791
}
810-
if err := vfsObj.MknodAt(ctx, fs.creds, &pop, &opts); err != nil {
792+
createCreds := parent.credsForCreate(rp.Credentials(), parent.isSGIDSet())
793+
if err := vfsObj.MknodAt(ctx, createCreds, &pop, &opts); err != nil {
811794
if haveUpperWhiteout {
812795
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
813796
}
814797
return err
815798
}
816-
creds := rp.Credentials()
817-
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
818-
Stat: parent.newChildOwnerStat(opts.Mode, creds),
819-
}); err != nil {
820-
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
821-
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after MknodAt metadata update failure: %v", cleanupErr))
822-
} else if haveUpperWhiteout {
823-
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
824-
}
825-
return err
826-
}
827799
return nil
828800
})
829801
}
@@ -1027,7 +999,8 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
1027999
}
10281000
}
10291001
// Create the file on the upper layer, and get an FD representing it.
1030-
upperFD, err := vfsObj.OpenAt(ctx, fs.creds, &pop, &vfs.OpenOptions{
1002+
createCreds := parent.credsForCreate(creds, parent.isSGIDSet())
1003+
upperFD, err := vfsObj.OpenAt(ctx, createCreds, &pop, &vfs.OpenOptions{
10311004
Flags: opts.Flags&^vfs.FileCreationFlags | linux.O_CREAT | linux.O_EXCL,
10321005
Mode: opts.Mode,
10331006
})
@@ -1038,18 +1011,6 @@ func (fs *filesystem) createAndOpenLocked(ctx context.Context, rp *vfs.Resolving
10381011
return nil, err
10391012
}
10401013

1041-
// Change the file's owner to the caller. We can't use upperFD.SetStat()
1042-
// because it will pick up creds from ctx.
1043-
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
1044-
Stat: parent.newChildOwnerStat(opts.Mode, creds),
1045-
}); err != nil {
1046-
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
1047-
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after OpenAt(O_CREAT) metadata update failure: %v", cleanupErr))
1048-
} else if haveUpperWhiteout {
1049-
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
1050-
}
1051-
return nil, err
1052-
}
10531014
// Re-lookup to get a dentry representing the new file, which is needed for
10541015
// the returned FD.
10551016
child, _, err := fs.getChildLocked(ctx, parent, childName, ds)
@@ -1609,27 +1570,13 @@ func (fs *filesystem) SymlinkAt(ctx context.Context, rp *vfs.ResolvingPath, targ
16091570
return err
16101571
}
16111572
}
1612-
if err := vfsObj.SymlinkAt(ctx, fs.creds, &pop, target); err != nil {
1573+
createCreds := parent.credsForCreate(rp.Credentials(), parent.isSGIDSet())
1574+
if err := vfsObj.SymlinkAt(ctx, createCreds, &pop, target); err != nil {
16131575
if haveUpperWhiteout {
16141576
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
16151577
}
16161578
return err
16171579
}
1618-
creds := rp.Credentials()
1619-
if err := vfsObj.SetStatAt(ctx, fs.creds, &pop, &vfs.SetStatOptions{
1620-
Stat: linux.Statx{
1621-
Mask: linux.STATX_UID | linux.STATX_GID,
1622-
UID: uint32(creds.EffectiveKUID),
1623-
GID: uint32(creds.EffectiveKGID),
1624-
},
1625-
}); err != nil {
1626-
if cleanupErr := vfsObj.UnlinkAt(ctx, fs.creds, &pop); cleanupErr != nil {
1627-
panic(fmt.Sprintf("unrecoverable overlayfs inconsistency: failed to delete upper layer file after SymlinkAt metadata update failure: %v", cleanupErr))
1628-
} else if haveUpperWhiteout {
1629-
fs.cleanupRecreateWhiteout(ctx, vfsObj, &pop)
1630-
}
1631-
return err
1632-
}
16331580
return nil
16341581
})
16351582
}

pkg/sentry/fsimpl/overlay/overlay.go

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,10 @@ type filesystem struct {
9999
// used for accesses to the filesystem's layers. creds is immutable.
100100
creds *auth.Credentials
101101

102+
// createCreds is a cache of credentials that is used for create operations.
103+
createCredsMu createCredsMutex `state:"nosave"`
104+
createCreds map[createCredsKey]*auth.Credentials
105+
102106
// dirDevMinor is the device minor number used for directories. dirDevMinor
103107
// is immutable.
104108
dirDevMinor uint32
@@ -148,6 +152,12 @@ type layerDevNoAndIno struct {
148152
ino uint64
149153
}
150154

155+
// +stateify savable
156+
type createCredsKey struct {
157+
uid auth.KUID
158+
gid auth.KGID
159+
}
160+
151161
// GetFilesystem implements vfs.FilesystemType.GetFilesystem.
152162
func (fstype FilesystemType) GetFilesystem(ctx context.Context, vfsObj *vfs.VirtualFilesystem, creds *auth.Credentials, source string, opts vfs.GetFilesystemOptions) (*vfs.Filesystem, *vfs.Dentry, error) {
153163
mopts := vfs.GenericParseMountOptions(opts.Data)
@@ -876,25 +886,35 @@ func (d *dentry) mayDelete(creds *auth.Credentials, child *dentry) error {
876886
)
877887
}
878888

879-
// newChildOwnerStat returns a Statx for configuring the UID, GID, and mode of
880-
// children.
881-
func (d *dentry) newChildOwnerStat(mode linux.FileMode, creds *auth.Credentials) linux.Statx {
882-
stat := linux.Statx{
883-
Mask: uint32(linux.STATX_UID | linux.STATX_GID),
884-
UID: uint32(creds.EffectiveKUID),
885-
GID: uint32(creds.EffectiveKGID),
886-
}
887-
// Set GID and possibly the SGID bit if the parent is an SGID directory.
888-
d.copyMu.RLock()
889-
defer d.copyMu.RUnlock()
890-
if d.mode.Load()&linux.ModeSetGID == linux.ModeSetGID {
891-
stat.GID = d.gid.Load()
892-
if stat.Mode&linux.ModeDirectory == linux.ModeDirectory {
893-
stat.Mode = uint16(mode) | linux.ModeSetGID
894-
stat.Mask |= linux.STATX_MODE
889+
// credsForCreate returns the creds to use for creation operations.
890+
func (d *dentry) credsForCreate(creds *auth.Credentials, isSGIDSet bool) *auth.Credentials {
891+
// During creation operations, Linux uses a modified version of fs.creds. It
892+
// sets the fsuid/fsgid to the caller's fsuid/fsgid. Note that gVisor doesn't
893+
// support fsuid/fsgid and just uses euid/egid to determine file ownership
894+
// for new files. So we just update euid/egid with the caller's euid/egid.
895+
key := createCredsKey{
896+
uid: creds.EffectiveKUID,
897+
gid: creds.EffectiveKGID,
898+
}
899+
if isSGIDSet {
900+
key.gid = auth.KGID(d.gid.Load())
901+
}
902+
if d.fs.creds.EffectiveKUID == key.uid && d.fs.creds.EffectiveKGID == key.gid {
903+
return d.fs.creds
904+
}
905+
d.fs.createCredsMu.Lock()
906+
defer d.fs.createCredsMu.Unlock()
907+
newCreds, ok := d.fs.createCreds[key]
908+
if !ok {
909+
newCreds = d.fs.creds.Fork()
910+
newCreds.EffectiveKUID = key.uid
911+
newCreds.EffectiveKGID = key.gid
912+
if d.fs.createCreds == nil {
913+
d.fs.createCreds = make(map[createCredsKey]*auth.Credentials)
895914
}
915+
d.fs.createCreds[key] = newCreds
896916
}
897-
return stat
917+
return newCreds
898918
}
899919

900920
// fileDescription is embedded by overlay implementations of

0 commit comments

Comments
 (0)