mbox series

[0/6] Change ->mkdir() and vfs_mkdir() to return a dentry.

Message ID 20250220234630.983190-1-neilb@suse.de
Headers show
Series Change ->mkdir() and vfs_mkdir() to return a dentry. | expand

Message

NeilBrown Feb. 20, 2025, 11:36 p.m. UTC
I'm posting this to a wider audience now as I think it is close to its final form.
I have not included every fs maintainer explicitly (though this patch touches every writable FS)
but hope that fsdevel will catch enough of those).  I have included the affected clients
of vfs_mkdir: nfsd, smb/server, cachefiles, and the filesystems with non-trivial changes:
nfs, cephfs, hostfs, fuse.

mkdir is unique among object creation interfaces as there can only be
one dentry for an directory inode.  There is a possibilty of races which
could result in the inode created by mkdir already having a dentry when
mkdir comes to attach one.  To cope with this, three users of
vfs_mkdir() sometimes do a lookup to find the correct dentry when the
one that was passed in wasn't used.  This lookup is clumsy and racy.

This patch set changes mkdir interface so that the filesystem can
provide the correct dentry.  Some times this still requires a look-up
which can be racey, but having the filesystem do it limits this to only
when it is absolutely necessary.

So this series changes ->mkdir and vfs_mkdir() to allow a dentry to be
returned, changes a few filesystems to actually return a dentry
sometimes, and changes the callers of vfs_mkdir() to use the returned dentry.

I think it best if this could all land through the VFS tree as ask maitainers of:
 cachefiles nfsd smb/server
 hostfs ceph nfs fuse
to provide a Reviewed-by.

Thanks,
NeilBrown

Comments

Viacheslav Dubeyko Feb. 21, 2025, 1:48 a.m. UTC | #1
On Fri, 2025-02-21 at 10:36 +1100, NeilBrown wrote:
> ceph already splices the correct dentry (in splice_dentry()) from the
> result of mkdir but does nothing more with it.
> 
> Now that ->mkdir can return a dentry, return the correct dentry.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/ceph/dir.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 39e0f240de06..c1a1c168bb27 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -1099,6 +1099,7 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
>  	struct ceph_client *cl = mdsc->fsc->client;
>  	struct ceph_mds_request *req;
>  	struct ceph_acl_sec_ctx as_ctx = {};
> +	struct dentry *ret = NULL;

I believe that it makes sense to initialize pointer by error here and always
return ret as output. If something goes wrong in the logic, then we already have
error.

>  	int err;
>  	int op;
>  
> @@ -1166,14 +1167,20 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
>  	    !req->r_reply_info.head->is_dentry)
>  		err = ceph_handle_notrace_create(dir, dentry);
>  out_req:
> +	if (!err && req->r_dentry != dentry)
> +		/* Some other dentry was spliced in */
> +		ret = dget(req->r_dentry);
>  	ceph_mdsc_put_request(req);
>  out:
>  	if (!err)
> +		/* Should this use 'ret' ?? */

Could we make a decision should or shouldn't? :)
It looks not good to leave this comment instead of proper implementation. Do we
have some obstacles to make this decision?

>  		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
>  	else
>  		d_drop(dentry);
>  	ceph_release_acl_sec_ctx(&as_ctx);
> -	return ERR_PTR(err);
> +	if (err)
> +		return ERR_PTR(err);
> +	return ret;

What's about this?

return err ? ERR_PTR(err) : ret;

Thanks,
Slava.

>  }
>  
>  static int ceph_link(struct dentry *old_dentry, struct inode *dir,
Jeff Layton Feb. 21, 2025, 1:31 p.m. UTC | #2
On Fri, 2025-02-21 at 10:36 +1100, NeilBrown wrote:
> ceph already splices the correct dentry (in splice_dentry()) from the
> result of mkdir but does nothing more with it.
> 
> Now that ->mkdir can return a dentry, return the correct dentry.
> 
> Signed-off-by: NeilBrown <neilb@suse.de>
> ---
>  fs/ceph/dir.c | 9 ++++++++-
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 39e0f240de06..c1a1c168bb27 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -1099,6 +1099,7 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
>  	struct ceph_client *cl = mdsc->fsc->client;
>  	struct ceph_mds_request *req;
>  	struct ceph_acl_sec_ctx as_ctx = {};
> +	struct dentry *ret = NULL;
>  	int err;
>  	int op;
>  
> @@ -1166,14 +1167,20 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
>  	    !req->r_reply_info.head->is_dentry)
>  		err = ceph_handle_notrace_create(dir, dentry);
>  out_req:
> +	if (!err && req->r_dentry != dentry)
> +		/* Some other dentry was spliced in */
> +		ret = dget(req->r_dentry);
>  	ceph_mdsc_put_request(req);
>  out:
>  	if (!err)
> +		/* Should this use 'ret' ?? */

Probably? Is there a guarantee that "dentry" will even have an inode
attached if it got replaced by an disconnected one in the dcache?

>  		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
>  	else
>  		d_drop(dentry);
>  	ceph_release_acl_sec_ctx(&as_ctx);
> -	return ERR_PTR(err);
> +	if (err)
> +		return ERR_PTR(err);
> +	return ret;
>  }
>  
>  static int ceph_link(struct dentry *old_dentry, struct inode *dir,
Al Viro Feb. 22, 2025, 4:19 a.m. UTC | #3
On Fri, Feb 21, 2025 at 10:36:30AM +1100, NeilBrown wrote:

> +In general, filesystems which use d_instantiate_new() to install the new
> +inode can safely return NULL.  Filesystems which may not have an I_NEW inode
> +should use d_drop();d_splice_alias() and return the result of the latter.

IMO that's a bad pattern, _especially_ if you want to go for "in-update"
kind of stuff later.

That's pretty much the same thing as d_drop()/d_rehash() window.

We'd be better off dropping that BUG_ON() in d_splice_alias() and teaching
__d_add() to handle the "it's a hashed negative" case.
Al Viro Feb. 22, 2025, 4:41 a.m. UTC | #4
On Fri, Feb 21, 2025 at 10:36:34AM +1100, NeilBrown wrote:

>  nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
>  {
>  	struct posix_acl *default_acl, *acl;
> @@ -612,15 +612,18 @@ nfs3_proc_mkdir(struct inode *dir, struct dentry *dentry, struct iattr *sattr)
>  		dentry = d_alias;
>  
>  	status = nfs3_proc_setacls(d_inode(dentry), acl, default_acl);
> +	if (status && d_alias)
> +		dput(d_alias);
>  
> -	dput(d_alias);
>  out_release_acls:
>  	posix_acl_release(acl);
>  	posix_acl_release(default_acl);
>  out:
>  	nfs3_free_createdata(data);
>  	dprintk("NFS reply mkdir: %d\n", status);
> -	return status;
> +	if (status)
> +		return ERR_PTR(status);
> +	return d_alias;

Ugh...  That's really hard to follow - you are leaving a dangling
reference in d_alias textually upstream of using that variable.
The only reason it's not a bug is that dput() is reachable only
with status && d_alias and that guarantees that we'll
actually go away on if (status) return ERR_PTR(status).

Worse, you can reach 'out:' with d_alias uninitialized.  Yes,
all such branches happen with status either still unmodified
since it's initialization (which is non-zero) or under
if (status), so again, that return d_alias; is unreachable.

So the code is correct, but it's really asking for trouble down
the road.

BTW, dput(NULL) is guaranteed to be a no-op...
Al Viro Feb. 22, 2025, 4:56 a.m. UTC | #5
On Fri, Feb 21, 2025 at 10:36:30AM +1100, NeilBrown wrote:

> Not all filesystems reliably result in a positive hashed dentry:
> 
> - NFS, cifs, hostfs will sometimes need to perform a lookup of
>   the name to get inode information.  Races could result in this
>   returning something different. Note that this lookup is
>   non-atomic which is what we are trying to avoid.  Placing the
>   lookup in filesystem code means it only happens when the filesystem
>   has no other option.

At least in case of cifs I don't see that lookup anywhere in your
series.  Have I missed it, or...?
NeilBrown Feb. 24, 2025, 2:15 a.m. UTC | #6
On Fri, 21 Feb 2025, Viacheslav Dubeyko wrote:
> On Fri, 2025-02-21 at 10:36 +1100, NeilBrown wrote:
> > ceph already splices the correct dentry (in splice_dentry()) from the
> > result of mkdir but does nothing more with it.
> > 
> > Now that ->mkdir can return a dentry, return the correct dentry.
> > 
> > Signed-off-by: NeilBrown <neilb@suse.de>
> > ---
> >  fs/ceph/dir.c | 9 ++++++++-
> >  1 file changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index 39e0f240de06..c1a1c168bb27 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -1099,6 +1099,7 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> >  	struct ceph_client *cl = mdsc->fsc->client;
> >  	struct ceph_mds_request *req;
> >  	struct ceph_acl_sec_ctx as_ctx = {};
> > +	struct dentry *ret = NULL;
> 
> I believe that it makes sense to initialize pointer by error here and always
> return ret as output. If something goes wrong in the logic, then we already have
> error.

I'm not certain that I understand, but I have made a change which seems
to be consistent with the above and included it below.  Please let me
know if it is what you intended.

> 
> >  	int err;
> >  	int op;
> >  
> > @@ -1166,14 +1167,20 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> >  	    !req->r_reply_info.head->is_dentry)
> >  		err = ceph_handle_notrace_create(dir, dentry);
> >  out_req:
> > +	if (!err && req->r_dentry != dentry)
> > +		/* Some other dentry was spliced in */
> > +		ret = dget(req->r_dentry);
> >  	ceph_mdsc_put_request(req);
> >  out:
> >  	if (!err)
> > +		/* Should this use 'ret' ?? */
> 
> Could we make a decision should or shouldn't? :)
> It looks not good to leave this comment instead of proper implementation. Do we
> have some obstacles to make this decision?

I suspect we should use ret, but I didn't want to make a change which
wasn't directly required by my needed.  So I highlighted this which
looks to me like a possible bug, hoping that someone more familiar with
the code would give an opinion.  Do you agree that 'ret' (i.e.
->r_dentry) should be used when ret is not NULL?

> 
> >  		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> >  	else
> >  		d_drop(dentry);
> >  	ceph_release_acl_sec_ctx(&as_ctx);
> > -	return ERR_PTR(err);
> > +	if (err)
> > +		return ERR_PTR(err);
> > +	return ret;
> 
> What's about this?
> 
> return err ? ERR_PTR(err) : ret;

We could do that, but you said above that you thought we should always
return 'ret' - which does make some sense.

What do you think of the following alternate patch?

Thanks,
NeilBrown

diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 39e0f240de06..d2e5c557df83 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1099,6 +1099,7 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	struct ceph_client *cl = mdsc->fsc->client;
 	struct ceph_mds_request *req;
 	struct ceph_acl_sec_ctx as_ctx = {};
+	struct dentry *ret;
 	int err;
 	int op;
 
@@ -1116,32 +1117,32 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 		      ceph_vinop(dir), dentry, dentry, mode);
 		op = CEPH_MDS_OP_MKDIR;
 	} else {
-		err = -EROFS;
+		ret = ERR_PTR(-EROFS);
 		goto out;
 	}
 
 	if (op == CEPH_MDS_OP_MKDIR &&
 	    ceph_quota_is_max_files_exceeded(dir)) {
-		err = -EDQUOT;
+		ret = ERR_PTR(-EDQUOT);
 		goto out;
 	}
 	if ((op == CEPH_MDS_OP_MKSNAP) && IS_ENCRYPTED(dir) &&
 	    !fscrypt_has_encryption_key(dir)) {
-		err = -ENOKEY;
+		ret = ERR_PTR(-ENOKEY);
 		goto out;
 	}
 
 
 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
 	if (IS_ERR(req)) {
-		err = PTR_ERR(req);
+		ret = ERR_CAST(req);
 		goto out;
 	}
 
 	mode |= S_IFDIR;
 	req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
 	if (IS_ERR(req->r_new_inode)) {
-		err = PTR_ERR(req->r_new_inode);
+		ret = ERR_CAST(req->r_new_inode);
 		req->r_new_inode = NULL;
 		goto out_req;
 	}
@@ -1165,15 +1166,23 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
 	    !req->r_reply_info.head->is_target &&
 	    !req->r_reply_info.head->is_dentry)
 		err = ceph_handle_notrace_create(dir, dentry);
+	ret = ERR_PTR(err);
 out_req:
+	if (!IS_ERR(ret) && req->r_dentry != dentry)
+		/* Some other dentry was spliced in */
+		ret = dget(req->r_dentry);
 	ceph_mdsc_put_request(req);
 out:
-	if (!err)
-		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
-	else
+	if (!IS_ERR(ret)) {
+		if (ret)
+			ceph_init_inode_acls(d_inode(ret), &as_ctx);
+		else
+			ceph_init_inode_acls(d_inode(dentry), &as_ctx);
+	} else {
 		d_drop(dentry);
+	}
 	ceph_release_acl_sec_ctx(&as_ctx);
-	return ERR_PTR(err);
+	return ret;
 }
 
 static int ceph_link(struct dentry *old_dentry, struct inode *dir,
Viacheslav Dubeyko Feb. 24, 2025, 10:09 p.m. UTC | #7
On Mon, 2025-02-24 at 13:15 +1100, NeilBrown wrote:
> On Fri, 21 Feb 2025, Viacheslav Dubeyko wrote:
> > On Fri, 2025-02-21 at 10:36 +1100, NeilBrown wrote:
> > > ceph already splices the correct dentry (in splice_dentry()) from the
> > > result of mkdir but does nothing more with it.
> > > 
> > > Now that ->mkdir can return a dentry, return the correct dentry.
> > > 
> > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > ---
> > >  fs/ceph/dir.c | 9 ++++++++-
> > >  1 file changed, 8 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > index 39e0f240de06..c1a1c168bb27 100644
> > > --- a/fs/ceph/dir.c
> > > +++ b/fs/ceph/dir.c
> > > @@ -1099,6 +1099,7 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> > >  	struct ceph_client *cl = mdsc->fsc->client;
> > >  	struct ceph_mds_request *req;
> > >  	struct ceph_acl_sec_ctx as_ctx = {};
> > > +	struct dentry *ret = NULL;
> > 
> > I believe that it makes sense to initialize pointer by error here and always
> > return ret as output. If something goes wrong in the logic, then we already have
> > error.
> 
> I'm not certain that I understand, but I have made a change which seems
> to be consistent with the above and included it below.  Please let me
> know if it is what you intended.
> 
> > 
> > >  	int err;
> > >  	int op;
> > >  
> > > @@ -1166,14 +1167,20 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> > >  	    !req->r_reply_info.head->is_dentry)
> > >  		err = ceph_handle_notrace_create(dir, dentry);
> > >  out_req:
> > > +	if (!err && req->r_dentry != dentry)
> > > +		/* Some other dentry was spliced in */
> > > +		ret = dget(req->r_dentry);
> > >  	ceph_mdsc_put_request(req);
> > >  out:
> > >  	if (!err)
> > > +		/* Should this use 'ret' ?? */
> > 
> > Could we make a decision should or shouldn't? :)
> > It looks not good to leave this comment instead of proper implementation. Do we
> > have some obstacles to make this decision?
> 
> I suspect we should use ret, but I didn't want to make a change which
> wasn't directly required by my needed.  So I highlighted this which
> looks to me like a possible bug, hoping that someone more familiar with
> the code would give an opinion.  Do you agree that 'ret' (i.e.
> ->r_dentry) should be used when ret is not NULL?
> 

I think if we are going to return ret as a dentry, then it makes sense to call
the ceph_init_inode_acls() for d_inode(ret). I don't see the point to call
ceph_init_inode_acls() for d_inode(dentry) then.

> > 
> > >  		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> > >  	else
> > >  		d_drop(dentry);
> > >  	ceph_release_acl_sec_ctx(&as_ctx);
> > > -	return ERR_PTR(err);
> > > +	if (err)
> > > +		return ERR_PTR(err);
> > > +	return ret;
> > 
> > What's about this?
> > 
> > return err ? ERR_PTR(err) : ret;
> 
> We could do that, but you said above that you thought we should always
> return 'ret' - which does make some sense.
> 
> What do you think of the following alternate patch?
> 

Patch looks good to me. Thanks.

Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>

> Thanks,
> NeilBrown
> 
> diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> index 39e0f240de06..d2e5c557df83 100644
> --- a/fs/ceph/dir.c
> +++ b/fs/ceph/dir.c
> @@ -1099,6 +1099,7 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
>  	struct ceph_client *cl = mdsc->fsc->client;
>  	struct ceph_mds_request *req;
>  	struct ceph_acl_sec_ctx as_ctx = {};
> +	struct dentry *ret;
>  	int err;
>  	int op;
>  
> @@ -1116,32 +1117,32 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
>  		      ceph_vinop(dir), dentry, dentry, mode);
>  		op = CEPH_MDS_OP_MKDIR;
>  	} else {
> -		err = -EROFS;
> +		ret = ERR_PTR(-EROFS);
>  		goto out;
>  	}
>  
>  	if (op == CEPH_MDS_OP_MKDIR &&
>  	    ceph_quota_is_max_files_exceeded(dir)) {
> -		err = -EDQUOT;
> +		ret = ERR_PTR(-EDQUOT);
>  		goto out;
>  	}
>  	if ((op == CEPH_MDS_OP_MKSNAP) && IS_ENCRYPTED(dir) &&
>  	    !fscrypt_has_encryption_key(dir)) {
> -		err = -ENOKEY;
> +		ret = ERR_PTR(-ENOKEY);
>  		goto out;
>  	}
>  
>  
>  	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
>  	if (IS_ERR(req)) {
> -		err = PTR_ERR(req);
> +		ret = ERR_CAST(req);
>  		goto out;
>  	}
>  
>  	mode |= S_IFDIR;
>  	req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
>  	if (IS_ERR(req->r_new_inode)) {
> -		err = PTR_ERR(req->r_new_inode);
> +		ret = ERR_CAST(req->r_new_inode);
>  		req->r_new_inode = NULL;
>  		goto out_req;
>  	}
> @@ -1165,15 +1166,23 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
>  	    !req->r_reply_info.head->is_target &&
>  	    !req->r_reply_info.head->is_dentry)
>  		err = ceph_handle_notrace_create(dir, dentry);
> +	ret = ERR_PTR(err);
>  out_req:
> +	if (!IS_ERR(ret) && req->r_dentry != dentry)
> +		/* Some other dentry was spliced in */
> +		ret = dget(req->r_dentry);
>  	ceph_mdsc_put_request(req);
>  out:
> -	if (!err)
> -		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> -	else
> +	if (!IS_ERR(ret)) {
> +		if (ret)
> +			ceph_init_inode_acls(d_inode(ret), &as_ctx);
> +		else
> +			ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> +	} else {
>  		d_drop(dentry);
> +	}
>  	ceph_release_acl_sec_ctx(&as_ctx);
> -	return ERR_PTR(err);
> +	return ret;
>  }
>  
>  static int ceph_link(struct dentry *old_dentry, struct inode *dir,
> 

Thanks,
Slava.
Jeff Layton Feb. 24, 2025, 10:53 p.m. UTC | #8
On Mon, 2025-02-24 at 22:09 +0000, Viacheslav Dubeyko wrote:
> On Mon, 2025-02-24 at 13:15 +1100, NeilBrown wrote:
> > On Fri, 21 Feb 2025, Viacheslav Dubeyko wrote:
> > > On Fri, 2025-02-21 at 10:36 +1100, NeilBrown wrote:
> > > > ceph already splices the correct dentry (in splice_dentry()) from the
> > > > result of mkdir but does nothing more with it.
> > > > 
> > > > Now that ->mkdir can return a dentry, return the correct dentry.
> > > > 
> > > > Signed-off-by: NeilBrown <neilb@suse.de>
> > > > ---
> > > >  fs/ceph/dir.c | 9 ++++++++-
> > > >  1 file changed, 8 insertions(+), 1 deletion(-)
> > > > 
> > > > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > > > index 39e0f240de06..c1a1c168bb27 100644
> > > > --- a/fs/ceph/dir.c
> > > > +++ b/fs/ceph/dir.c
> > > > @@ -1099,6 +1099,7 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> > > >  	struct ceph_client *cl = mdsc->fsc->client;
> > > >  	struct ceph_mds_request *req;
> > > >  	struct ceph_acl_sec_ctx as_ctx = {};
> > > > +	struct dentry *ret = NULL;
> > > 
> > > I believe that it makes sense to initialize pointer by error here and always
> > > return ret as output. If something goes wrong in the logic, then we already have
> > > error.
> > 
> > I'm not certain that I understand, but I have made a change which seems
> > to be consistent with the above and included it below.  Please let me
> > know if it is what you intended.
> > 
> > > 
> > > >  	int err;
> > > >  	int op;
> > > >  
> > > > @@ -1166,14 +1167,20 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> > > >  	    !req->r_reply_info.head->is_dentry)
> > > >  		err = ceph_handle_notrace_create(dir, dentry);
> > > >  out_req:
> > > > +	if (!err && req->r_dentry != dentry)
> > > > +		/* Some other dentry was spliced in */
> > > > +		ret = dget(req->r_dentry);
> > > >  	ceph_mdsc_put_request(req);
> > > >  out:
> > > >  	if (!err)
> > > > +		/* Should this use 'ret' ?? */
> > > 
> > > Could we make a decision should or shouldn't? :)
> > > It looks not good to leave this comment instead of proper implementation. Do we
> > > have some obstacles to make this decision?
> > 
> > I suspect we should use ret, but I didn't want to make a change which
> > wasn't directly required by my needed.  So I highlighted this which
> > looks to me like a possible bug, hoping that someone more familiar with
> > the code would give an opinion.  Do you agree that 'ret' (i.e.
> > ->r_dentry) should be used when ret is not NULL?
> > 
> 
> I think if we are going to return ret as a dentry, then it makes sense to call
> the ceph_init_inode_acls() for d_inode(ret). I don't see the point to call
> ceph_init_inode_acls() for d_inode(dentry) then.
> 

My assumption when looking at this was that they should point to the
same inode. That said, working with d_inode(ret) after that point is
less confusing to the casual reader.

> > > 
> > > >  		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> > > >  	else
> > > >  		d_drop(dentry);
> > > >  	ceph_release_acl_sec_ctx(&as_ctx);
> > > > -	return ERR_PTR(err);
> > > > +	if (err)
> > > > +		return ERR_PTR(err);
> > > > +	return ret;
> > > 
> > > What's about this?
> > > 
> > > return err ? ERR_PTR(err) : ret;
> > 
> > We could do that, but you said above that you thought we should always
> > return 'ret' - which does make some sense.
> > 
> > What do you think of the following alternate patch?
> > 
> 
> Patch looks good to me. Thanks.
> 
> Reviewed-by: Viacheslav Dubeyko <Slava.Dubeyko@ibm.com>
> 
> > Thanks,
> > NeilBrown
> > 
> > diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
> > index 39e0f240de06..d2e5c557df83 100644
> > --- a/fs/ceph/dir.c
> > +++ b/fs/ceph/dir.c
> > @@ -1099,6 +1099,7 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> >  	struct ceph_client *cl = mdsc->fsc->client;
> >  	struct ceph_mds_request *req;
> >  	struct ceph_acl_sec_ctx as_ctx = {};
> > +	struct dentry *ret;
> >  	int err;
> >  	int op;
> >  
> > @@ -1116,32 +1117,32 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> >  		      ceph_vinop(dir), dentry, dentry, mode);
> >  		op = CEPH_MDS_OP_MKDIR;
> >  	} else {
> > -		err = -EROFS;
> > +		ret = ERR_PTR(-EROFS);
> >  		goto out;
> >  	}
> >  
> >  	if (op == CEPH_MDS_OP_MKDIR &&
> >  	    ceph_quota_is_max_files_exceeded(dir)) {
> > -		err = -EDQUOT;
> > +		ret = ERR_PTR(-EDQUOT);
> >  		goto out;
> >  	}
> >  	if ((op == CEPH_MDS_OP_MKSNAP) && IS_ENCRYPTED(dir) &&
> >  	    !fscrypt_has_encryption_key(dir)) {
> > -		err = -ENOKEY;
> > +		ret = ERR_PTR(-ENOKEY);
> >  		goto out;
> >  	}
> >  
> >  
> >  	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
> >  	if (IS_ERR(req)) {
> > -		err = PTR_ERR(req);
> > +		ret = ERR_CAST(req);
> >  		goto out;
> >  	}
> >  
> >  	mode |= S_IFDIR;
> >  	req->r_new_inode = ceph_new_inode(dir, dentry, &mode, &as_ctx);
> >  	if (IS_ERR(req->r_new_inode)) {
> > -		err = PTR_ERR(req->r_new_inode);
> > +		ret = ERR_CAST(req->r_new_inode);
> >  		req->r_new_inode = NULL;
> >  		goto out_req;
> >  	}
> > @@ -1165,15 +1166,23 @@ static struct dentry *ceph_mkdir(struct mnt_idmap *idmap, struct inode *dir,
> >  	    !req->r_reply_info.head->is_target &&
> >  	    !req->r_reply_info.head->is_dentry)
> >  		err = ceph_handle_notrace_create(dir, dentry);
> > +	ret = ERR_PTR(err);
> >  out_req:
> > +	if (!IS_ERR(ret) && req->r_dentry != dentry)
> > +		/* Some other dentry was spliced in */
> > +		ret = dget(req->r_dentry);
> >  	ceph_mdsc_put_request(req);
> >  out:
> > -	if (!err)
> > -		ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> > -	else
> > +	if (!IS_ERR(ret)) {
> > +		if (ret)
> > +			ceph_init_inode_acls(d_inode(ret), &as_ctx);
> > +		else
> > +			ceph_init_inode_acls(d_inode(dentry), &as_ctx);
> > +	} else {
> >  		d_drop(dentry);
> > +	}
> >  	ceph_release_acl_sec_ctx(&as_ctx);
> > -	return ERR_PTR(err);
> > +	return ret;
> >  }
> >  
> >  static int ceph_link(struct dentry *old_dentry, struct inode *dir,
> > 
> 
> Thanks,
> Slava.
>