Message ID | 20230919081900.1096840-1-max.kellermann@ionos.com |
---|---|
State | New |
Headers | show |
Series | fs: apply umask if POSIX ACL support is disabled | expand |
On 9/19/23 16:18, Max Kellermann wrote: > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > --- > fs/ceph/super.h | 1 + > fs/ext2/acl.h | 1 + > fs/jfs/jfs_acl.h | 1 + > include/linux/posix_acl.h | 1 + > 4 files changed, 4 insertions(+) > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 51c7f2b14f6f..e7e2f264acf4 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -1194,6 +1194,7 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) > static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode, > struct ceph_acl_sec_ctx *as_ctx) > { > + *mode &= ~current_umask(); > return 0; > } This LGTM. Shouldn't we also do this in 'ceph_pre_init_acls()' when we couldn't get 'acl' from 'posix_acl_create()' ? Thanks! - Xiubo > static inline void ceph_init_inode_acls(struct inode *inode, > diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h > index 4a8443a2b8ec..694af789c614 100644 > --- a/fs/ext2/acl.h > +++ b/fs/ext2/acl.h > @@ -67,6 +67,7 @@ extern int ext2_init_acl (struct inode *, struct inode *); > > static inline int ext2_init_acl (struct inode *inode, struct inode *dir) > { > + inode->i_mode &= ~current_umask(); > return 0; > } > #endif > diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h > index f892e54d0fcd..10791e97a46f 100644 > --- a/fs/jfs/jfs_acl.h > +++ b/fs/jfs/jfs_acl.h > @@ -17,6 +17,7 @@ int jfs_init_acl(tid_t, struct inode *, struct inode *); > static inline int jfs_init_acl(tid_t tid, struct inode *inode, > struct inode *dir) > { > + inode->i_mode &= ~current_umask(); > return 0; > } > > diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h > index 0e65b3d634d9..54bc9b1061ca 100644 > --- a/include/linux/posix_acl.h > +++ b/include/linux/posix_acl.h > @@ -128,6 +128,7 @@ static inline void cache_no_acl(struct inode *inode) > static inline int posix_acl_create(struct inode *inode, umode_t *mode, > struct posix_acl **default_acl, struct posix_acl **acl) > { > + *mode &= ~current_umask(); > *default_acl = *acl = NULL; > return 0; > }
I think this is sane, but the patch needs a description of why this is necessary for these specific file systems. Thanks, Shaggy On 9/19/23 3:18AM, Max Kellermann wrote: > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > --- > fs/ceph/super.h | 1 + > fs/ext2/acl.h | 1 + > fs/jfs/jfs_acl.h | 1 + > include/linux/posix_acl.h | 1 + > 4 files changed, 4 insertions(+) > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 51c7f2b14f6f..e7e2f264acf4 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -1194,6 +1194,7 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) > static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode, > struct ceph_acl_sec_ctx *as_ctx) > { > + *mode &= ~current_umask(); > return 0; > } > static inline void ceph_init_inode_acls(struct inode *inode, > diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h > index 4a8443a2b8ec..694af789c614 100644 > --- a/fs/ext2/acl.h > +++ b/fs/ext2/acl.h > @@ -67,6 +67,7 @@ extern int ext2_init_acl (struct inode *, struct inode *); > > static inline int ext2_init_acl (struct inode *inode, struct inode *dir) > { > + inode->i_mode &= ~current_umask(); > return 0; > } > #endif > diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h > index f892e54d0fcd..10791e97a46f 100644 > --- a/fs/jfs/jfs_acl.h > +++ b/fs/jfs/jfs_acl.h > @@ -17,6 +17,7 @@ int jfs_init_acl(tid_t, struct inode *, struct inode *); > static inline int jfs_init_acl(tid_t tid, struct inode *inode, > struct inode *dir) > { > + inode->i_mode &= ~current_umask(); > return 0; > } > > diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h > index 0e65b3d634d9..54bc9b1061ca 100644 > --- a/include/linux/posix_acl.h > +++ b/include/linux/posix_acl.h > @@ -128,6 +128,7 @@ static inline void cache_no_acl(struct inode *inode) > static inline int posix_acl_create(struct inode *inode, umode_t *mode, > struct posix_acl **default_acl, struct posix_acl **acl) > { > + *mode &= ~current_umask(); > *default_acl = *acl = NULL; > return 0; > }
On 10/3/23 23:32, Dave Kleikamp wrote: > I think this is sane, but the patch needs a description of why this is > necessary for these specific file systems. > Sounds reasonable. Thanks - Xiubo > Thanks, > Shaggy > > On 9/19/23 3:18AM, Max Kellermann wrote: >> Signed-off-by: Max Kellermann <max.kellermann@ionos.com> >> --- >> fs/ceph/super.h | 1 + >> fs/ext2/acl.h | 1 + >> fs/jfs/jfs_acl.h | 1 + >> include/linux/posix_acl.h | 1 + >> 4 files changed, 4 insertions(+) >> >> diff --git a/fs/ceph/super.h b/fs/ceph/super.h >> index 51c7f2b14f6f..e7e2f264acf4 100644 >> --- a/fs/ceph/super.h >> +++ b/fs/ceph/super.h >> @@ -1194,6 +1194,7 @@ static inline void >> ceph_forget_all_cached_acls(struct inode *inode) >> static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode, >> struct ceph_acl_sec_ctx *as_ctx) >> { >> + *mode &= ~current_umask(); >> return 0; >> } >> static inline void ceph_init_inode_acls(struct inode *inode, >> diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h >> index 4a8443a2b8ec..694af789c614 100644 >> --- a/fs/ext2/acl.h >> +++ b/fs/ext2/acl.h >> @@ -67,6 +67,7 @@ extern int ext2_init_acl (struct inode *, struct >> inode *); >> static inline int ext2_init_acl (struct inode *inode, struct >> inode *dir) >> { >> + inode->i_mode &= ~current_umask(); >> return 0; >> } >> #endif >> diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h >> index f892e54d0fcd..10791e97a46f 100644 >> --- a/fs/jfs/jfs_acl.h >> +++ b/fs/jfs/jfs_acl.h >> @@ -17,6 +17,7 @@ int jfs_init_acl(tid_t, struct inode *, struct >> inode *); >> static inline int jfs_init_acl(tid_t tid, struct inode *inode, >> struct inode *dir) >> { >> + inode->i_mode &= ~current_umask(); >> return 0; >> } >> diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h >> index 0e65b3d634d9..54bc9b1061ca 100644 >> --- a/include/linux/posix_acl.h >> +++ b/include/linux/posix_acl.h >> @@ -128,6 +128,7 @@ static inline void cache_no_acl(struct inode *inode) >> static inline int posix_acl_create(struct inode *inode, umode_t *mode, >> struct posix_acl **default_acl, struct posix_acl **acl) >> { >> + *mode &= ~current_umask(); >> *default_acl = *acl = NULL; >> return 0; >> } >
On Tue, Oct 3, 2023 at 5:32 PM Dave Kleikamp <dave.kleikamp@oracle.com> wrote: > I think this is sane, but the patch needs a description of why this is > necessary for these specific file systems. Indeed the patch description was lacking, sorry. I sent v2 with a better description. Max
On 10/9/23 9:43AM, Max Kellermann wrote: > One important implementation detail of the posix_acl_create() function > is that it applies the umask to the "mode" parameter. If > CONFIG_FS_POSIX_ACL is disabled, this detail is missing and the umask > may not get applied. > > This patch adds the missing code to posix_acl_create() and to three > filesystems that omit the posix_acl_create() call if their individual > ACL support is disabled (CONFIG_EXT2_FS_POSIX_ACL, > CONFIG_JFS_POSIX_ACL, CONFIG_CEPH_FS_POSIX_ACL). If > posix_acl_create() never gets called, the umask needs to be applied > anyway. > > This bug used to be exploitable easily with O_TMPFILE (see > https://bugzilla.kernel.org/show_bug.cgi?id=203625) but that part was > fixed by commit ac6800e279a2 ("fs: Add missing umask strip in > vfs_tmpfile") last year. The bug may not be reachable by userspace > anymore, but since it is apparently still necessary to apply the umask > again in posix_acl_create(), there is no reason to assume it's not > necessary with ACL support is disabled. > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> Acked-by: Dave Kleikamp <dave.kleikamp@oracle.com> > --- > fs/ceph/super.h | 6 ++++++ > fs/ext2/acl.h | 6 ++++++ > fs/jfs/jfs_acl.h | 6 ++++++ > include/linux/posix_acl.h | 1 + > 4 files changed, 19 insertions(+) > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 51c7f2b14f6f..58349639bd57 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -1194,6 +1194,12 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) > static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode, > struct ceph_acl_sec_ctx *as_ctx) > { > + /* usually, the umask is applied by posix_acl_create(), but if > + * ACL support is disabled at compile time, we need to do it > + * here, because posix_acl_create() will never be called > + */ > + *mode &= ~current_umask(); > + > return 0; > } > static inline void ceph_init_inode_acls(struct inode *inode, > diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h > index 4a8443a2b8ec..0ecaa9c20c0c 100644 > --- a/fs/ext2/acl.h > +++ b/fs/ext2/acl.h > @@ -67,6 +67,12 @@ extern int ext2_init_acl (struct inode *, struct inode *); > > static inline int ext2_init_acl (struct inode *inode, struct inode *dir) > { > + /* usually, the umask is applied by posix_acl_create(), but if > + * ACL support is disabled at compile time, we need to do it > + * here, because posix_acl_create() will never be called > + */ > + inode->i_mode &= ~current_umask(); > + > return 0; > } > #endif > diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h > index f892e54d0fcd..64a05e663a45 100644 > --- a/fs/jfs/jfs_acl.h > +++ b/fs/jfs/jfs_acl.h > @@ -17,6 +17,12 @@ int jfs_init_acl(tid_t, struct inode *, struct inode *); > static inline int jfs_init_acl(tid_t tid, struct inode *inode, > struct inode *dir) > { > + /* usually, the umask is applied by posix_acl_create(), but if > + * ACL support is disabled at compile time, we need to do it > + * here, because posix_acl_create() will never be called > + */ > + inode->i_mode &= ~current_umask(); > + > return 0; > } > > diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h > index 0e65b3d634d9..54bc9b1061ca 100644 > --- a/include/linux/posix_acl.h > +++ b/include/linux/posix_acl.h > @@ -128,6 +128,7 @@ static inline void cache_no_acl(struct inode *inode) > static inline int posix_acl_create(struct inode *inode, umode_t *mode, > struct posix_acl **default_acl, struct posix_acl **acl) > { > + *mode &= ~current_umask(); > *default_acl = *acl = NULL; > return 0; > }
On Mon 09-10-23 16:43:39, Max Kellermann wrote: > One important implementation detail of the posix_acl_create() function > is that it applies the umask to the "mode" parameter. If > CONFIG_FS_POSIX_ACL is disabled, this detail is missing and the umask > may not get applied. > > This patch adds the missing code to posix_acl_create() and to three > filesystems that omit the posix_acl_create() call if their individual > ACL support is disabled (CONFIG_EXT2_FS_POSIX_ACL, > CONFIG_JFS_POSIX_ACL, CONFIG_CEPH_FS_POSIX_ACL). If > posix_acl_create() never gets called, the umask needs to be applied > anyway. > > This bug used to be exploitable easily with O_TMPFILE (see > https://bugzilla.kernel.org/show_bug.cgi?id=203625) but that part was > fixed by commit ac6800e279a2 ("fs: Add missing umask strip in > vfs_tmpfile") last year. The bug may not be reachable by userspace > anymore, but since it is apparently still necessary to apply the umask > again in posix_acl_create(), there is no reason to assume it's not > necessary with ACL support is disabled. > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> Thanks for the updated changelog! But as I'm looking into VFS code isn't this already handled by mode_strip_umask() / vfs_prepare_mode() in fs/namei.c? Because posix_acl_create() doesn't do anything to 'mode' for !IS_POSIXACL() filesystems either. So at least ext2 (where I've checked the mount option handling) does seem to have umask properly applied in all the cases. But I might be missing something... Honza > --- > fs/ceph/super.h | 6 ++++++ > fs/ext2/acl.h | 6 ++++++ > fs/jfs/jfs_acl.h | 6 ++++++ > include/linux/posix_acl.h | 1 + > 4 files changed, 19 insertions(+) > > diff --git a/fs/ceph/super.h b/fs/ceph/super.h > index 51c7f2b14f6f..58349639bd57 100644 > --- a/fs/ceph/super.h > +++ b/fs/ceph/super.h > @@ -1194,6 +1194,12 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) > static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode, > struct ceph_acl_sec_ctx *as_ctx) > { > + /* usually, the umask is applied by posix_acl_create(), but if > + * ACL support is disabled at compile time, we need to do it > + * here, because posix_acl_create() will never be called > + */ > + *mode &= ~current_umask(); > + > return 0; > } > static inline void ceph_init_inode_acls(struct inode *inode, > diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h > index 4a8443a2b8ec..0ecaa9c20c0c 100644 > --- a/fs/ext2/acl.h > +++ b/fs/ext2/acl.h > @@ -67,6 +67,12 @@ extern int ext2_init_acl (struct inode *, struct inode *); > > static inline int ext2_init_acl (struct inode *inode, struct inode *dir) > { > + /* usually, the umask is applied by posix_acl_create(), but if > + * ACL support is disabled at compile time, we need to do it > + * here, because posix_acl_create() will never be called > + */ > + inode->i_mode &= ~current_umask(); > + > return 0; > } > #endif > diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h > index f892e54d0fcd..64a05e663a45 100644 > --- a/fs/jfs/jfs_acl.h > +++ b/fs/jfs/jfs_acl.h > @@ -17,6 +17,12 @@ int jfs_init_acl(tid_t, struct inode *, struct inode *); > static inline int jfs_init_acl(tid_t tid, struct inode *inode, > struct inode *dir) > { > + /* usually, the umask is applied by posix_acl_create(), but if > + * ACL support is disabled at compile time, we need to do it > + * here, because posix_acl_create() will never be called > + */ > + inode->i_mode &= ~current_umask(); > + > return 0; > } > > diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h > index 0e65b3d634d9..54bc9b1061ca 100644 > --- a/include/linux/posix_acl.h > +++ b/include/linux/posix_acl.h > @@ -128,6 +128,7 @@ static inline void cache_no_acl(struct inode *inode) > static inline int posix_acl_create(struct inode *inode, umode_t *mode, > struct posix_acl **default_acl, struct posix_acl **acl) > { > + *mode &= ~current_umask(); > *default_acl = *acl = NULL; > return 0; > } > -- > 2.39.2 >
On Tue, Oct 10, 2023 at 3:11 PM Jan Kara <jack@suse.cz> wrote: > Thanks for the updated changelog! But as I'm looking into VFS code isn't > this already handled by mode_strip_umask() / vfs_prepare_mode() in > fs/namei.c? Because posix_acl_create() doesn't do anything to 'mode' for > !IS_POSIXACL() filesystems either. So at least ext2 (where I've checked > the mount option handling) does seem to have umask properly applied in all > the cases. But I might be missing something... I'm not sure either. I was hoping the VFS experts could tell something about how this API is supposed to be used and whose responsibility it is to apply the umask. There used to be some confusion in the code, to the point it was missing completely for O_TMPFILE. I'm still somewhat confused. Maybe this is a chance to clear this confusion up and then document it? I wish there was one central place to apply the umask, and not spread it around two (or more?) different code locations, depending on whether there's an ACL. For my taste, that sort of policy is too error prone for something as sensitive as umasks. After we already had the O_TMPFILE vulnerability (which was only fixed last year, three years(!) after I reported it).
On Tue 10-10-23 15:17:17, Max Kellermann wrote: > On Tue, Oct 10, 2023 at 3:11 PM Jan Kara <jack@suse.cz> wrote: > > Thanks for the updated changelog! But as I'm looking into VFS code isn't > > this already handled by mode_strip_umask() / vfs_prepare_mode() in > > fs/namei.c? Because posix_acl_create() doesn't do anything to 'mode' for > > !IS_POSIXACL() filesystems either. So at least ext2 (where I've checked > > the mount option handling) does seem to have umask properly applied in all > > the cases. But I might be missing something... > > I'm not sure either. I was hoping the VFS experts could tell something > about how this API is supposed to be used and whose responsibility it > is to apply the umask. There used to be some confusion in the code, to > the point it was missing completely for O_TMPFILE. I'm still somewhat > confused. Maybe this is a chance to clear this confusion up and then > document it? So I've checked some more and the kernel doc comments before mode_strip_umask() and vfs_prepare_mode() make it pretty obvious - all paths creating new inodes must be calling vfs_prepare_mode(). As a result mode_strip_umask() which handles umask stripping for filesystems not supporting posix ACLs. For filesystems that do support ACLs, posix_acl_create() must be call and that handles umask stripping. So your fix should not be needed. CCed some relevant people for confirmation. > I wish there was one central place to apply the umask, and not spread > it around two (or more?) different code locations, depending on > whether there's an ACL. For my taste, that sort of policy is too error > prone for something as sensitive as umasks. After we already had the > O_TMPFILE vulnerability (which was only fixed last year, three > years(!) after I reported it). I agree having umask stripping in two places is not great but it's difficult to avoid with how posix ACLs are implemented and intertwined in various filesystem implementations. At least the current design made it quite a bit harder to forget to strip the umask. Honza
On Wed, Oct 11, 2023 at 12:05 PM Jan Kara <jack@suse.cz> wrote: > So I've checked some more and the kernel doc comments before > mode_strip_umask() and vfs_prepare_mode() make it pretty obvious - all > paths creating new inodes must be calling vfs_prepare_mode(). As a result > mode_strip_umask() which handles umask stripping for filesystems not > supporting posix ACLs. For filesystems that do support ACLs, > posix_acl_create() must be call and that handles umask stripping. So your > fix should not be needed. CCed some relevant people for confirmation. Thanks, Jan. Do you think the documentation is obvious enough, or shall I look around and try to improve the documentation? I'm not a FS expert, so it may be just my fault that it confused me.... I just analyzed the O_TMPFILE vulnerability four years ago (because it was reported to me as the maintainer of a userspace software). Apart from my doubts that this API contract is too error prone, I'm not quite sure if all filesystems really implement it properly. For example, overlayfs unconditionally sets SB_POSIXACL, even if the kernel has no ACL support. Would this ignore the umask? I'm not sure, overlayfs is a special beast. Then there's orangefs which allows setting the "acl" mount option (and thus SB_POSIXACL) even if the kernel has no ACL support. Same for gfs2 and maybe cifs, maybe more, I didn't check them all. The "mainstream" filesystems like ext4 seem to be implemented properly, though this is still too fragile for my taste... ext4 has the SB_POSIXACL code even if there's no kernel ACL support, but it is not reachable because EXT4_MOUNT_POSIX_ACL cannot be set from userspace in that case. The code looks suspicious, but is okay in the end - still not my taste. I see so much redundant code regarding the "acl" mount option in all filesystems. I believe the API should be designed in a way that it is safe-by-default, and shouldn't need very careful considerations in each and every filesystem, or else all filesystems repeat the same mistakes until the last one gets fixed. Max
On Wed 11-10-23 12:51:12, Max Kellermann wrote: > On Wed, Oct 11, 2023 at 12:05 PM Jan Kara <jack@suse.cz> wrote: > > So I've checked some more and the kernel doc comments before > > mode_strip_umask() and vfs_prepare_mode() make it pretty obvious - all > > paths creating new inodes must be calling vfs_prepare_mode(). As a result > > mode_strip_umask() which handles umask stripping for filesystems not > > supporting posix ACLs. For filesystems that do support ACLs, > > posix_acl_create() must be call and that handles umask stripping. So your > > fix should not be needed. CCed some relevant people for confirmation. > > Thanks, Jan. Do you think the documentation is obvious enough, or > shall I look around and try to improve the documentation? I'm not a FS > expert, so it may be just my fault that it confused me.... I just > analyzed the O_TMPFILE vulnerability four years ago (because it was > reported to me as the maintainer of a userspace software). > > Apart from my doubts that this API contract is too error prone, I'm > not quite sure if all filesystems really implement it properly. > > For example, overlayfs unconditionally sets SB_POSIXACL, even if the > kernel has no ACL support. Would this ignore the umask? I'm not sure, > overlayfs is a special beast. > Then there's orangefs which allows setting the "acl" mount option (and > thus SB_POSIXACL) even if the kernel has no ACL support. Same for gfs2 > and maybe cifs, maybe more, I didn't check them all. Indeed, *that* looks like a bug. Good spotting! I'd say posix_acl_create() defined in include/linux/posix_acl.h for the !CONFIG_FS_POSIX_ACL case should be stripping mode using umask. Care to send a patch for this? > The "mainstream" filesystems like ext4 seem to be implemented > properly, though this is still too fragile for my taste... ext4 has > the SB_POSIXACL code even if there's no kernel ACL support, but it is > not reachable because EXT4_MOUNT_POSIX_ACL cannot be set from > userspace in that case. The code looks suspicious, but is okay in the > end - still not my taste. > > I see so much redundant code regarding the "acl" mount option in all > filesystems. I believe the API should be designed in a way that it is > safe-by-default, and shouldn't need very careful considerations in > each and every filesystem, or else all filesystems repeat the same > mistakes until the last one gets fixed. So I definitely agree that we should handle as many things as possible in VFS without relying on filesystems to get it right. Thus I agree VFS should do the right thing even if the filesystem sets SB_POSIXACl when !CONFIG_FS_POSIX_ACL. Honza
On Wed, Oct 11, 2023 at 2:07 PM Jan Kara <jack@suse.cz> wrote: > Indeed, *that* looks like a bug. Good spotting! I'd say posix_acl_create() > defined in include/linux/posix_acl.h for the !CONFIG_FS_POSIX_ACL case > should be stripping mode using umask. Care to send a patch for this? You mean like the patch you're commenting on right now? ;-) But without the other filesystems. I'll resend it with just the posix_acl.h hunk.
On Wed 11-10-23 14:18:45, Max Kellermann wrote: > On Wed, Oct 11, 2023 at 2:07 PM Jan Kara <jack@suse.cz> wrote: > > Indeed, *that* looks like a bug. Good spotting! I'd say posix_acl_create() > > defined in include/linux/posix_acl.h for the !CONFIG_FS_POSIX_ACL case > > should be stripping mode using umask. Care to send a patch for this? > > You mean like the patch you're commenting on right now? ;-) Yeah, OK, that was a bit silly ;) I was too concentrated on the filesystem bits. > But without the other filesystems. I'll resend it with just the > posix_acl.h hunk. Yup, and a bit massaged changelog... Thanks a lot! Honza
On Wed, Oct 11, 2023 at 2:18 PM Max Kellermann <max.kellermann@ionos.com> wrote: > But without the other filesystems. I'll resend it with just the > posix_acl.h hunk. Thinking again, I don't think this is the proper solution. This may server as a workaround so those broken filesystems don't suffer from this bug, but it's not proper. posix_acl_create() is only supposed to appy the umask if the inode supports ACLs; if not, the VFS is supposed to do it. But if the filesystem pretends to have ACL support but the kernel does not, it's really a filesystem bug. Hacking the umask code into posix_acl_create() for that inconsistent case doesn't sound right. A better workaround would be this patch: https://patchwork.kernel.org/project/linux-nfs/patch/151603744662.29035.4910161264124875658.stgit@rabbit.intern.cm-ag/ I submitted it more than 5 years ago, it got one positive review, but was never merged. This patch enables the VFS's umask code even if the filesystem prerents to support ACLs. This still doesn't fix the filesystem bug, but makes VFS's behavior consistent. Max
On Wed 11-10-23 14:27:49, Max Kellermann wrote: > On Wed, Oct 11, 2023 at 2:18 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > But without the other filesystems. I'll resend it with just the > > posix_acl.h hunk. > > Thinking again, I don't think this is the proper solution. This may > server as a workaround so those broken filesystems don't suffer from > this bug, but it's not proper. > > posix_acl_create() is only supposed to appy the umask if the inode > supports ACLs; if not, the VFS is supposed to do it. But if the > filesystem pretends to have ACL support but the kernel does not, it's > really a filesystem bug. Hacking the umask code into > posix_acl_create() for that inconsistent case doesn't sound right. > > A better workaround would be this patch: > https://patchwork.kernel.org/project/linux-nfs/patch/151603744662.29035.4910161264124875658.stgit@rabbit.intern.cm-ag/ > I submitted it more than 5 years ago, it got one positive review, but > was never merged. > > This patch enables the VFS's umask code even if the filesystem > prerents to support ACLs. This still doesn't fix the filesystem bug, > but makes VFS's behavior consistent. OK, that solution works for me as well. I agree it seems a tad bit cleaner. Christian, which one would you prefer? Honza
On Wed, Oct 11, 2023 at 03:59:22PM +0200, Jan Kara wrote: > On Wed 11-10-23 14:27:49, Max Kellermann wrote: > > On Wed, Oct 11, 2023 at 2:18 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > > But without the other filesystems. I'll resend it with just the > > > posix_acl.h hunk. > > > > Thinking again, I don't think this is the proper solution. This may > > server as a workaround so those broken filesystems don't suffer from > > this bug, but it's not proper. > > > > posix_acl_create() is only supposed to appy the umask if the inode > > supports ACLs; if not, the VFS is supposed to do it. But if the > > filesystem pretends to have ACL support but the kernel does not, it's > > really a filesystem bug. Hacking the umask code into > > posix_acl_create() for that inconsistent case doesn't sound right. > > > > A better workaround would be this patch: > > https://patchwork.kernel.org/project/linux-nfs/patch/151603744662.29035.4910161264124875658.stgit@rabbit.intern.cm-ag/ > > I submitted it more than 5 years ago, it got one positive review, but > > was never merged. > > > > This patch enables the VFS's umask code even if the filesystem > > prerents to support ACLs. This still doesn't fix the filesystem bug, > > but makes VFS's behavior consistent. > > OK, that solution works for me as well. I agree it seems a tad bit cleaner. > Christian, which one would you prefer? So it always bugged me that POSIX ACLs push umask stripping down into the individual filesystems but it's hard to get rid of this. And we tried to improve the situation during the POSIX ACL rework by introducing vfs_prepare_umask(). Aside from that, the problem had been that filesystems like nfs v4 intentionally raised SB_POSIXACL to prevent umask stripping in the VFS. IOW, for them SB_POSIXACL was equivalent to "don't apply any umask". And afaict nfs v4 has it's own thing going on how and where umasks are applied. However, since we now have the following commit in vfs.misc: commit f61b9bb3f8386a5e59b49bf1310f5b34f47bcef9 Author: Jeff Layton <jlayton@kernel.org> AuthorDate: Mon Sep 11 20:25:50 2023 -0400 Commit: Christian Brauner <brauner@kernel.org> CommitDate: Thu Sep 21 15:37:47 2023 +0200 fs: add a new SB_I_NOUMASK flag SB_POSIXACL must be set when a filesystem supports POSIX ACLs, but NFSv4 also sets this flag to prevent the VFS from applying the umask on newly-created files. NFSv4 doesn't support POSIX ACLs however, which causes confusion when other subsystems try to test for them. Add a new SB_I_NOUMASK flag that allows filesystems to opt-in to umask stripping without advertising support for POSIX ACLs. Set the new flag on NFSv4 instead of SB_POSIXACL. Also, move mode_strip_umask to namei.h and convert init_mknod and init_mkdir to use it. Signed-off-by: Jeff Layton <jlayton@kernel.org> Message-Id: <20230911-acl-fix-v3-1-b25315333f6c@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org> I think it's possible to pick up the first patch linked above: fix umask on NFS with CONFIG_FS_POSIX_ACL=n doesn't lead to any and see whether we see any regressions from this. The second patch I can't easily judge that should go through nfs if at all. So proposal/question: should we take the first patch into vfs.misc?
On Wed 11-10-23 17:27:37, Christian Brauner wrote: > On Wed, Oct 11, 2023 at 03:59:22PM +0200, Jan Kara wrote: > > On Wed 11-10-23 14:27:49, Max Kellermann wrote: > > > On Wed, Oct 11, 2023 at 2:18 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > > > But without the other filesystems. I'll resend it with just the > > > > posix_acl.h hunk. > > > > > > Thinking again, I don't think this is the proper solution. This may > > > server as a workaround so those broken filesystems don't suffer from > > > this bug, but it's not proper. > > > > > > posix_acl_create() is only supposed to appy the umask if the inode > > > supports ACLs; if not, the VFS is supposed to do it. But if the > > > filesystem pretends to have ACL support but the kernel does not, it's > > > really a filesystem bug. Hacking the umask code into > > > posix_acl_create() for that inconsistent case doesn't sound right. > > > > > > A better workaround would be this patch: > > > https://patchwork.kernel.org/project/linux-nfs/patch/151603744662.29035.4910161264124875658.stgit@rabbit.intern.cm-ag/ > > > I submitted it more than 5 years ago, it got one positive review, but > > > was never merged. > > > > > > This patch enables the VFS's umask code even if the filesystem > > > prerents to support ACLs. This still doesn't fix the filesystem bug, > > > but makes VFS's behavior consistent. > > > > OK, that solution works for me as well. I agree it seems a tad bit cleaner. > > Christian, which one would you prefer? > > So it always bugged me that POSIX ACLs push umask stripping down into > the individual filesystems but it's hard to get rid of this. And we > tried to improve the situation during the POSIX ACL rework by > introducing vfs_prepare_umask(). > > Aside from that, the problem had been that filesystems like nfs v4 > intentionally raised SB_POSIXACL to prevent umask stripping in the VFS. > IOW, for them SB_POSIXACL was equivalent to "don't apply any umask". Ah, what a hack... > And afaict nfs v4 has it's own thing going on how and where umasks are > applied. However, since we now have the following commit in vfs.misc: > > commit f61b9bb3f8386a5e59b49bf1310f5b34f47bcef9 > Author: Jeff Layton <jlayton@kernel.org> > AuthorDate: Mon Sep 11 20:25:50 2023 -0400 > Commit: Christian Brauner <brauner@kernel.org> > CommitDate: Thu Sep 21 15:37:47 2023 +0200 > > fs: add a new SB_I_NOUMASK flag > > SB_POSIXACL must be set when a filesystem supports POSIX ACLs, but NFSv4 > also sets this flag to prevent the VFS from applying the umask on > newly-created files. NFSv4 doesn't support POSIX ACLs however, which > causes confusion when other subsystems try to test for them. > > Add a new SB_I_NOUMASK flag that allows filesystems to opt-in to umask > stripping without advertising support for POSIX ACLs. Set the new flag > on NFSv4 instead of SB_POSIXACL. > > Also, move mode_strip_umask to namei.h and convert init_mknod and > init_mkdir to use it. > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > Message-Id: <20230911-acl-fix-v3-1-b25315333f6c@kernel.org> > Signed-off-by: Christian Brauner <brauner@kernel.org> > > I think it's possible to pick up the first patch linked above: > > fix umask on NFS with CONFIG_FS_POSIX_ACL=n doesn't lead to any > > and see whether we see any regressions from this. > > The second patch I can't easily judge that should go through nfs if at > all. > > So proposal/question: should we take the first patch into vfs.misc? Sounds good to me. I have checked whether some other filesystem does not try to play similar games as NFS and it appears not although overlayfs does seem to play some games with umasks. Honza
On Wed, Oct 11, 2023 at 05:27:37PM +0200, Christian Brauner wrote: > Aside from that, the problem had been that filesystems like nfs v4 > intentionally raised SB_POSIXACL to prevent umask stripping in the VFS. > IOW, for them SB_POSIXACL was equivalent to "don't apply any umask". > > And afaict nfs v4 has it's own thing going on how and where umasks are > applied. However, since we now have the following commit in vfs.misc: > > fs: add a new SB_I_NOUMASK flag To summarize, just to make sure I understand where we're going. Since normally (excepting unusual cases like NFS), it's fine to strip the umask bits twice (once in the VFS, and once in the file system, for those file systems that are doing it), once we have SB_I_NOUMASK and NFS starts using it, then the VFS can just unconditionally strip the umask bits, and then we can gradually clean up the file system umask handling (which would then be harmlessly duplicative). Did I get this right? - Ted
On Wed 11-10-23 13:00:42, Theodore Ts'o wrote: > On Wed, Oct 11, 2023 at 05:27:37PM +0200, Christian Brauner wrote: > > Aside from that, the problem had been that filesystems like nfs v4 > > intentionally raised SB_POSIXACL to prevent umask stripping in the VFS. > > IOW, for them SB_POSIXACL was equivalent to "don't apply any umask". > > > > And afaict nfs v4 has it's own thing going on how and where umasks are > > applied. However, since we now have the following commit in vfs.misc: > > > > fs: add a new SB_I_NOUMASK flag > > To summarize, just to make sure I understand where we're going. Since > normally (excepting unusual cases like NFS), it's fine to strip the > umask bits twice (once in the VFS, and once in the file system, for > those file systems that are doing it), once we have SB_I_NOUMASK and > NFS starts using it, then the VFS can just unconditionally strip the > umask bits, and then we can gradually clean up the file system umask > handling (which would then be harmlessly duplicative). > > Did I get this right? I don't think this is accurate. posix_acl_create() needs unmasked 'mode' because instead of using current_umask() for masking it wants to use whatever is stored in the ACLs as an umask. So I still think we need to keep umask handling in both posix_acl_create() and vfs_prepare_mode(). But filesystem's only obligation would be to call posix_acl_create() if the inode is IS_POSIXACL. No more caring about when to apply umask and when not based on config or mount options. Honza
On Wed, Oct 11, 2023 at 06:29:04PM +0200, Jan Kara wrote: > On Wed 11-10-23 17:27:37, Christian Brauner wrote: > > On Wed, Oct 11, 2023 at 03:59:22PM +0200, Jan Kara wrote: > > > On Wed 11-10-23 14:27:49, Max Kellermann wrote: > > > > On Wed, Oct 11, 2023 at 2:18 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > > > > But without the other filesystems. I'll resend it with just the > > > > > posix_acl.h hunk. > > > > > > > > Thinking again, I don't think this is the proper solution. This may > > > > server as a workaround so those broken filesystems don't suffer from > > > > this bug, but it's not proper. > > > > > > > > posix_acl_create() is only supposed to appy the umask if the inode > > > > supports ACLs; if not, the VFS is supposed to do it. But if the > > > > filesystem pretends to have ACL support but the kernel does not, it's > > > > really a filesystem bug. Hacking the umask code into > > > > posix_acl_create() for that inconsistent case doesn't sound right. > > > > > > > > A better workaround would be this patch: > > > > https://patchwork.kernel.org/project/linux-nfs/patch/151603744662.29035.4910161264124875658.stgit@rabbit.intern.cm-ag/ > > > > I submitted it more than 5 years ago, it got one positive review, but > > > > was never merged. > > > > > > > > This patch enables the VFS's umask code even if the filesystem > > > > prerents to support ACLs. This still doesn't fix the filesystem bug, > > > > but makes VFS's behavior consistent. > > > > > > OK, that solution works for me as well. I agree it seems a tad bit cleaner. > > > Christian, which one would you prefer? > > > > So it always bugged me that POSIX ACLs push umask stripping down into > > the individual filesystems but it's hard to get rid of this. And we > > tried to improve the situation during the POSIX ACL rework by > > introducing vfs_prepare_umask(). > > > > Aside from that, the problem had been that filesystems like nfs v4 > > intentionally raised SB_POSIXACL to prevent umask stripping in the VFS. > > IOW, for them SB_POSIXACL was equivalent to "don't apply any umask". > > Ah, what a hack... > > > And afaict nfs v4 has it's own thing going on how and where umasks are > > applied. However, since we now have the following commit in vfs.misc: > > > > commit f61b9bb3f8386a5e59b49bf1310f5b34f47bcef9 > > Author: Jeff Layton <jlayton@kernel.org> > > AuthorDate: Mon Sep 11 20:25:50 2023 -0400 > > Commit: Christian Brauner <brauner@kernel.org> > > CommitDate: Thu Sep 21 15:37:47 2023 +0200 > > > > fs: add a new SB_I_NOUMASK flag > > > > SB_POSIXACL must be set when a filesystem supports POSIX ACLs, but NFSv4 > > also sets this flag to prevent the VFS from applying the umask on > > newly-created files. NFSv4 doesn't support POSIX ACLs however, which > > causes confusion when other subsystems try to test for them. > > > > Add a new SB_I_NOUMASK flag that allows filesystems to opt-in to umask > > stripping without advertising support for POSIX ACLs. Set the new flag > > on NFSv4 instead of SB_POSIXACL. > > > > Also, move mode_strip_umask to namei.h and convert init_mknod and > > init_mkdir to use it. > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > > Message-Id: <20230911-acl-fix-v3-1-b25315333f6c@kernel.org> > > Signed-off-by: Christian Brauner <brauner@kernel.org> > > > > I think it's possible to pick up the first patch linked above: > > > > fix umask on NFS with CONFIG_FS_POSIX_ACL=n doesn't lead to any > > > > and see whether we see any regressions from this. > > > > The second patch I can't easily judge that should go through nfs if at > > all. > > > > So proposal/question: should we take the first patch into vfs.misc? > > Sounds good to me. I have checked whether some other filesystem does not > try to play similar games as NFS and it appears not although overlayfs does > seem to play some games with umasks. I think that overlayfs sets SB_POSIXACL unconditionally to ensure that the upper filesystem can decide where the umask needs to be stripped. If the upper filesystem doesn't have SB_POSIXACL then the umask will be stripped directly in e.g., vfs_create(), and vfs_tmpfile(). If it does then it will be done in the upper filesystems. So with the patch I linked above that we have in vfs.misc we should be able to change overlayfs to behave similar to NFS: diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 9f43f0d303ad..361189b676b0 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1489,8 +1489,16 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : ovl_trusted_xattr_handlers; sb->s_fs_info = ofs; +#ifdef CONFIG_FS_POSIX_ACL sb->s_flags |= SB_POSIXACL; +#endif sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE; + /* + * Ensure that umask handling is done by the filesystems used + * for the the upper layer instead of overlayfs as that would + * lead to unexpected results. + */ + sb->s_iflags |= SB_I_NOUMASK; err = -ENOMEM; root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe); Which means that umask handling will be done by the upper filesystems just as is done right now and overlayfs can stop advertising SB_POSIXACL support on a kernel that doesn't have support for it compiled in. How does that sound?
On Thu 12-10-23 11:22:29, Christian Brauner wrote: > On Wed, Oct 11, 2023 at 06:29:04PM +0200, Jan Kara wrote: > > On Wed 11-10-23 17:27:37, Christian Brauner wrote: > > > On Wed, Oct 11, 2023 at 03:59:22PM +0200, Jan Kara wrote: > > > > On Wed 11-10-23 14:27:49, Max Kellermann wrote: > > > > > On Wed, Oct 11, 2023 at 2:18 PM Max Kellermann <max.kellermann@ionos.com> wrote: > > > > > > But without the other filesystems. I'll resend it with just the > > > > > > posix_acl.h hunk. > > > > > > > > > > Thinking again, I don't think this is the proper solution. This may > > > > > server as a workaround so those broken filesystems don't suffer from > > > > > this bug, but it's not proper. > > > > > > > > > > posix_acl_create() is only supposed to appy the umask if the inode > > > > > supports ACLs; if not, the VFS is supposed to do it. But if the > > > > > filesystem pretends to have ACL support but the kernel does not, it's > > > > > really a filesystem bug. Hacking the umask code into > > > > > posix_acl_create() for that inconsistent case doesn't sound right. > > > > > > > > > > A better workaround would be this patch: > > > > > https://patchwork.kernel.org/project/linux-nfs/patch/151603744662.29035.4910161264124875658.stgit@rabbit.intern.cm-ag/ > > > > > I submitted it more than 5 years ago, it got one positive review, but > > > > > was never merged. > > > > > > > > > > This patch enables the VFS's umask code even if the filesystem > > > > > prerents to support ACLs. This still doesn't fix the filesystem bug, > > > > > but makes VFS's behavior consistent. > > > > > > > > OK, that solution works for me as well. I agree it seems a tad bit cleaner. > > > > Christian, which one would you prefer? > > > > > > So it always bugged me that POSIX ACLs push umask stripping down into > > > the individual filesystems but it's hard to get rid of this. And we > > > tried to improve the situation during the POSIX ACL rework by > > > introducing vfs_prepare_umask(). > > > > > > Aside from that, the problem had been that filesystems like nfs v4 > > > intentionally raised SB_POSIXACL to prevent umask stripping in the VFS. > > > IOW, for them SB_POSIXACL was equivalent to "don't apply any umask". > > > > Ah, what a hack... > > > > > And afaict nfs v4 has it's own thing going on how and where umasks are > > > applied. However, since we now have the following commit in vfs.misc: > > > > > > commit f61b9bb3f8386a5e59b49bf1310f5b34f47bcef9 > > > Author: Jeff Layton <jlayton@kernel.org> > > > AuthorDate: Mon Sep 11 20:25:50 2023 -0400 > > > Commit: Christian Brauner <brauner@kernel.org> > > > CommitDate: Thu Sep 21 15:37:47 2023 +0200 > > > > > > fs: add a new SB_I_NOUMASK flag > > > > > > SB_POSIXACL must be set when a filesystem supports POSIX ACLs, but NFSv4 > > > also sets this flag to prevent the VFS from applying the umask on > > > newly-created files. NFSv4 doesn't support POSIX ACLs however, which > > > causes confusion when other subsystems try to test for them. > > > > > > Add a new SB_I_NOUMASK flag that allows filesystems to opt-in to umask > > > stripping without advertising support for POSIX ACLs. Set the new flag > > > on NFSv4 instead of SB_POSIXACL. > > > > > > Also, move mode_strip_umask to namei.h and convert init_mknod and > > > init_mkdir to use it. > > > > > > Signed-off-by: Jeff Layton <jlayton@kernel.org> > > > Message-Id: <20230911-acl-fix-v3-1-b25315333f6c@kernel.org> > > > Signed-off-by: Christian Brauner <brauner@kernel.org> > > > > > > I think it's possible to pick up the first patch linked above: > > > > > > fix umask on NFS with CONFIG_FS_POSIX_ACL=n doesn't lead to any > > > > > > and see whether we see any regressions from this. > > > > > > The second patch I can't easily judge that should go through nfs if at > > > all. > > > > > > So proposal/question: should we take the first patch into vfs.misc? > > > > Sounds good to me. I have checked whether some other filesystem does not > > try to play similar games as NFS and it appears not although overlayfs does > > seem to play some games with umasks. > > I think that overlayfs sets SB_POSIXACL unconditionally to ensure that > the upper filesystem can decide where the umask needs to be stripped. If > the upper filesystem doesn't have SB_POSIXACL then the umask will be > stripped directly in e.g., vfs_create(), and vfs_tmpfile(). If it does > then it will be done in the upper filesystems. > > So with the patch I linked above that we have in vfs.misc we should be > able to change overlayfs to behave similar to NFS: Yep, I was thinking that this might be what overlayfs wants. But I know far to few about overlayfs to be sure ;) That's why I've CCed Amir in my previous email... Honza > > diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c > index 9f43f0d303ad..361189b676b0 100644 > --- a/fs/overlayfs/super.c > +++ b/fs/overlayfs/super.c > @@ -1489,8 +1489,16 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc) > sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers : > ovl_trusted_xattr_handlers; > sb->s_fs_info = ofs; > +#ifdef CONFIG_FS_POSIX_ACL > sb->s_flags |= SB_POSIXACL; > +#endif > sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE; > + /* > + * Ensure that umask handling is done by the filesystems used > + * for the the upper layer instead of overlayfs as that would > + * lead to unexpected results. > + */ > + sb->s_iflags |= SB_I_NOUMASK; > > err = -ENOMEM; > root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe); > > Which means that umask handling will be done by the upper filesystems > just as is done right now and overlayfs can stop advertising SB_POSIXACL > support on a kernel that doesn't have support for it compiled in. > > How does that sound?
On Wed, Oct 11, 2023 at 07:26:06PM +0200, Jan Kara wrote: > I don't think this is accurate. posix_acl_create() needs unmasked 'mode' > because instead of using current_umask() for masking it wants to use > whatever is stored in the ACLs as an umask. > > So I still think we need to keep umask handling in both posix_acl_create() > and vfs_prepare_mode(). But filesystem's only obligation would be to call > posix_acl_create() if the inode is IS_POSIXACL. No more caring about when > to apply umask and when not based on config or mount options. Ah, right, thanks for the clarification. I *think* the following patch in the ext4 dev branch (not yet in Linus's tree, but it should be in linux-next) should be harmless, though, right? And once we get the changes in vfs_prepare_mode() we can revert in ext4 --- or do folks I think I should just drop it from the ext4 dev branch now? Thanks, - Ted commit 484fd6c1de13b336806a967908a927cc0356e312 Author: Max Kellermann <max.kellermann@ionos.com> Date: Tue Sep 19 10:18:23 2023 +0200 ext4: apply umask if ACL support is disabled The function ext4_init_acl() calls posix_acl_create() which is responsible for applying the umask. But without CONFIG_EXT4_FS_POSIX_ACL, ext4_init_acl() is an empty inline function, and nobody applies the umask. This fixes a bug which causes the umask to be ignored with O_TMPFILE on ext4: https://github.com/MusicPlayerDaemon/MPD/issues/558 https://bugs.gentoo.org/show_bug.cgi?id=686142#c3 https://bugzilla.kernel.org/show_bug.cgi?id=203625 Reviewed-by: "J. Bruce Fields" <bfields@redhat.com> Cc: stable@vger.kernel.org Signed-off-by: Max Kellermann <max.kellermann@ionos.com> Link: https://lore.kernel.org/r/20230919081824.1096619-1-max.kellermann@ionos.com Signed-off-by: Theodore Ts'o <tytso@mit.edu> diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 0c5a79c3b5d4..ef4c19e5f570 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -68,6 +68,11 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); static inline int ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) { + /* usually, the umask is applied by posix_acl_create(), but if + ext4 ACL support is disabled at compile time, we need to do + it here, because posix_acl_create() will never be called */ + inode->i_mode &= ~current_umask(); + return 0; } #endif /* CONFIG_EXT4_FS_POSIX_ACL */
On Thu 12-10-23 10:29:18, Theodore Ts'o wrote: > On Wed, Oct 11, 2023 at 07:26:06PM +0200, Jan Kara wrote: > > I don't think this is accurate. posix_acl_create() needs unmasked 'mode' > > because instead of using current_umask() for masking it wants to use > > whatever is stored in the ACLs as an umask. > > > > So I still think we need to keep umask handling in both posix_acl_create() > > and vfs_prepare_mode(). But filesystem's only obligation would be to call > > posix_acl_create() if the inode is IS_POSIXACL. No more caring about when > > to apply umask and when not based on config or mount options. > > Ah, right, thanks for the clarification. I *think* the following > patch in the ext4 dev branch (not yet in Linus's tree, but it should > be in linux-next) should be harmless, though, right? And once we get > the changes in vfs_prepare_mode() we can revert in ext4 --- or do > folks I think I should just drop it from the ext4 dev branch now? It definitely does no harm. As you say, you can revert it once the VFS changes land if you want. Honza > commit 484fd6c1de13b336806a967908a927cc0356e312 > Author: Max Kellermann <max.kellermann@ionos.com> > Date: Tue Sep 19 10:18:23 2023 +0200 > > ext4: apply umask if ACL support is disabled > > The function ext4_init_acl() calls posix_acl_create() which is > responsible for applying the umask. But without > CONFIG_EXT4_FS_POSIX_ACL, ext4_init_acl() is an empty inline function, > and nobody applies the umask. > > This fixes a bug which causes the umask to be ignored with O_TMPFILE > on ext4: > > https://github.com/MusicPlayerDaemon/MPD/issues/558 > https://bugs.gentoo.org/show_bug.cgi?id=686142#c3 > https://bugzilla.kernel.org/show_bug.cgi?id=203625 > > Reviewed-by: "J. Bruce Fields" <bfields@redhat.com> > Cc: stable@vger.kernel.org > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > Link: https://lore.kernel.org/r/20230919081824.1096619-1-max.kellermann@ionos.com > Signed-off-by: Theodore Ts'o <tytso@mit.edu> > > diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h > index 0c5a79c3b5d4..ef4c19e5f570 100644 > --- a/fs/ext4/acl.h > +++ b/fs/ext4/acl.h > @@ -68,6 +68,11 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); > static inline int > ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) > { > + /* usually, the umask is applied by posix_acl_create(), but if > + ext4 ACL support is disabled at compile time, we need to do > + it here, because posix_acl_create() will never be called */ > + inode->i_mode &= ~current_umask(); > + > return 0; > } > #endif /* CONFIG_EXT4_FS_POSIX_ACL */
Jan Kara <jack@suse.cz> wrote: > On Thu 12-10-23 10:29:18, Theodore Ts'o wrote: > > On Wed, Oct 11, 2023 at 07:26:06PM +0200, Jan Kara wrote: > > > I don't think this is accurate. posix_acl_create() needs unmasked 'mode' > > > because instead of using current_umask() for masking it wants to use > > > whatever is stored in the ACLs as an umask. > > > > > > So I still think we need to keep umask handling in both posix_acl_create() > > > and vfs_prepare_mode(). But filesystem's only obligation would be to call > > > posix_acl_create() if the inode is IS_POSIXACL. No more caring about when > > > to apply umask and when not based on config or mount options. > > > > Ah, right, thanks for the clarification. I *think* the following > > patch in the ext4 dev branch (not yet in Linus's tree, but it should > > be in linux-next) should be harmless, though, right? And once we get > > the changes in vfs_prepare_mode() we can revert in ext4 --- or do > > folks I think I should just drop it from the ext4 dev branch now? > > It definitely does no harm. As you say, you can revert it once the VFS > changes land if you want. I've been debugging why flatpak was always considering its database corrupted, and found this commit to be the source of the issue. $ ostree --repo=repo --mode=bare-user-only init $ mkdir tree && umask 0 && ln -s target tree/symlink && umask 022 $ ostree --repo=repo commit --branch=foo tree/ c508e0564267b376661889b9016f8438bd6d39412078838f78856383fdd8ac2f $ ostree --repo=repo fsck Validating refs... Validating refs in collections... Enumerating commits... Verifying content integrity of 1 commit objects... fsck objects (1/4) [=== ] 25% error: In commits c508e0564267b376661889b9016f8438bd6d39412078838f78856383fdd8ac2f: fsck content object a6b40a5400ed082fbe067d2c8397aab54046a089768651c392a36db46d24c1cd: Corrupted file object; checksum expected='a6b40a5400ed082fbe067d2c8397aab54046a089768651c392a36db46d24c1cd' actual='6bdc88f9722f96dbd51735e381f8a1b0e01363e1d7ee2edbb474c091f83c3987' $ Turns out that symlinks are inheriting umask on my system (which has CONFIG_EXT4_FS_POSIX_ACL=n): $ umask 022 $ ln -s target symlink $ ls -l symlink lrwxr-xr-x 1 michael michael 6 Mar 13 13:28 symlink -> target $ Looking at the referenced functions, posix_acl_create() returns early before applying umask for symlinks, but ext4_init_acl() now applies the umask unconditionally. After reverting this commit, it works correctly. I am also unable to reproduce the mentioned issue with O_TMPFILE after reverting the commit. It seems that the bug was fixed properly in ac6800e279a2 ('fs: Add missing umask strip in vfs_tmpfile'), and all branches that have this ext4_init_acl patch already had ac6800e279a2 backported. So I think this patch should be reverted, since the bug was already fixed and it breaks symlink modes. If not, it should at least be changed to not to apply the umask to symlinks. > > commit 484fd6c1de13b336806a967908a927cc0356e312 > > Author: Max Kellermann <max.kellermann@ionos.com> > > Date: Tue Sep 19 10:18:23 2023 +0200 > > > > ext4: apply umask if ACL support is disabled > > > > The function ext4_init_acl() calls posix_acl_create() which is > > responsible for applying the umask. But without > > CONFIG_EXT4_FS_POSIX_ACL, ext4_init_acl() is an empty inline function, > > and nobody applies the umask. > > > > This fixes a bug which causes the umask to be ignored with O_TMPFILE > > on ext4: > > > > https://github.com/MusicPlayerDaemon/MPD/issues/558 > > https://bugs.gentoo.org/show_bug.cgi?id=686142#c3 > > https://bugzilla.kernel.org/show_bug.cgi?id=203625 > > > > Reviewed-by: "J. Bruce Fields" <bfields@redhat.com> > > Cc: stable@vger.kernel.org > > Signed-off-by: Max Kellermann <max.kellermann@ionos.com> > > Link: https://lore.kernel.org/r/20230919081824.1096619-1-max.kellermann@ionos.com > > Signed-off-by: Theodore Ts'o <tytso@mit.edu> > > > > diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h > > index 0c5a79c3b5d4..ef4c19e5f570 100644 > > --- a/fs/ext4/acl.h > > +++ b/fs/ext4/acl.h > > @@ -68,6 +68,11 @@ extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); > > static inline int > > ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) > > { > > + /* usually, the umask is applied by posix_acl_create(), but if > > + ext4 ACL support is disabled at compile time, we need to do > > + it here, because posix_acl_create() will never be called */ > > + inode->i_mode &= ~current_umask(); > > + > > return 0; > > } > > #endif /* CONFIG_EXT4_FS_POSIX_ACL */
On Wed, Mar 13, 2024 at 9:39 PM Michael Forney <mforney@mforney.org> wrote: > Turns out that symlinks are inheriting umask on my system (which > has CONFIG_EXT4_FS_POSIX_ACL=n): > > $ umask 022 > $ ln -s target symlink > $ ls -l symlink > lrwxr-xr-x 1 michael michael 6 Mar 13 13:28 symlink -> target > $ > > Looking at the referenced functions, posix_acl_create() returns > early before applying umask for symlinks, but ext4_init_acl() now > applies the umask unconditionally. Indeed, I forgot to exclude symlinks from this - sorry for the breakage. > After reverting this commit, it works correctly. I am also unable > to reproduce the mentioned issue with O_TMPFILE after reverting the > commit. It seems that the bug was fixed properly in ac6800e279a2 > ('fs: Add missing umask strip in vfs_tmpfile'), and all branches > that have this ext4_init_acl patch already had ac6800e279a2 backported. I can post a patch that adds the missing check or a revert - what do the FS maintainers prefer? (There was a bug with O_TMPFILE ignoring umasks years ago - I first posted the patch in 2018 or so - but by the time my patch actually got merged, the bug had already been fixed somewhere else IIRC.) Max
On Thu, Mar 14, 2024 at 02:08:04PM +0100, Max Kellermann wrote: > On Wed, Mar 13, 2024 at 9:39 PM Michael Forney <mforney@mforney.org> wrote: > > Turns out that symlinks are inheriting umask on my system (which > > has CONFIG_EXT4_FS_POSIX_ACL=n): > > > > $ umask 022 > > $ ln -s target symlink > > $ ls -l symlink > > lrwxr-xr-x 1 michael michael 6 Mar 13 13:28 symlink -> target > > $ > > > > Looking at the referenced functions, posix_acl_create() returns > > early before applying umask for symlinks, but ext4_init_acl() now > > applies the umask unconditionally. > > Indeed, I forgot to exclude symlinks from this - sorry for the breakage. > > > After reverting this commit, it works correctly. I am also unable > > to reproduce the mentioned issue with O_TMPFILE after reverting the > > commit. It seems that the bug was fixed properly in ac6800e279a2 > > ('fs: Add missing umask strip in vfs_tmpfile'), and all branches > > that have this ext4_init_acl patch already had ac6800e279a2 backported. > > I can post a patch that adds the missing check or a revert - what do > the FS maintainers prefer? If it works correctly with a revert we should remove the code rather than adding more code to handle a special case. > > (There was a bug with O_TMPFILE ignoring umasks years ago - I first > posted the patch in 2018 or so - but by the time my patch actually got > merged, the bug had already been fixed somewhere else IIRC.) Yeah, we fixed it a while ago and then I added generic VFS level umask handling but POSIX ACL are hurting us because they're a massive layering violation on that front.
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 51c7f2b14f6f..e7e2f264acf4 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -1194,6 +1194,7 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) static inline int ceph_pre_init_acls(struct inode *dir, umode_t *mode, struct ceph_acl_sec_ctx *as_ctx) { + *mode &= ~current_umask(); return 0; } static inline void ceph_init_inode_acls(struct inode *inode, diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 4a8443a2b8ec..694af789c614 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -67,6 +67,7 @@ extern int ext2_init_acl (struct inode *, struct inode *); static inline int ext2_init_acl (struct inode *inode, struct inode *dir) { + inode->i_mode &= ~current_umask(); return 0; } #endif diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index f892e54d0fcd..10791e97a46f 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -17,6 +17,7 @@ int jfs_init_acl(tid_t, struct inode *, struct inode *); static inline int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) { + inode->i_mode &= ~current_umask(); return 0; } diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h index 0e65b3d634d9..54bc9b1061ca 100644 --- a/include/linux/posix_acl.h +++ b/include/linux/posix_acl.h @@ -128,6 +128,7 @@ static inline void cache_no_acl(struct inode *inode) static inline int posix_acl_create(struct inode *inode, umode_t *mode, struct posix_acl **default_acl, struct posix_acl **acl) { + *mode &= ~current_umask(); *default_acl = *acl = NULL; return 0; }
Signed-off-by: Max Kellermann <max.kellermann@ionos.com> --- fs/ceph/super.h | 1 + fs/ext2/acl.h | 1 + fs/jfs/jfs_acl.h | 1 + include/linux/posix_acl.h | 1 + 4 files changed, 4 insertions(+)