diff mbox series

[v7,8/9] ceph: add object version support for sync read

Message ID 20211208124528.679831-2-xiubli@redhat.com
State New
Headers show
Series ceph: size handling for the fscrypt | expand

Commit Message

Xiubo Li Dec. 8, 2021, 12:45 p.m. UTC
From: Xiubo Li <xiubli@redhat.com>

Always return the last object's version.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
---
 fs/ceph/file.c  | 8 ++++++--
 fs/ceph/super.h | 3 ++-
 2 files changed, 8 insertions(+), 3 deletions(-)

Comments

Jeff Layton Dec. 8, 2021, 3:33 p.m. UTC | #1
On Wed, 2021-12-08 at 20:45 +0800, xiubli@redhat.com wrote:
> From: Xiubo Li <xiubli@redhat.com>
> 
> Always return the last object's version.
> 
> Signed-off-by: Xiubo Li <xiubli@redhat.com>
> ---
>  fs/ceph/file.c  | 8 ++++++--
>  fs/ceph/super.h | 3 ++-
>  2 files changed, 8 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index b42158c9aa16..9279b8642add 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -883,7 +883,8 @@ enum {
>   * only return a short read to the caller if we hit EOF.
>   */
>  ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
> -			 struct iov_iter *to, int *retry_op)
> +			 struct iov_iter *to, int *retry_op,
> +			 u64 *last_objver)
>  {
>  	struct ceph_inode_info *ci = ceph_inode(inode);
>  	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> @@ -950,6 +951,9 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>  					 req->r_end_latency,
>  					 len, ret);
>  
> +		if (last_objver)
> +			*last_objver = req->r_version;
> +

Much better! That said, this might be unreliable if (say) the first OSD
read was successful and then the second one failed on a long read that
spans objects. We'd want to return a short read in that case, but the
last_objver would end up being set to 0.

I think you shouldn't set last_objver unless the call is going to return
>0, and then you want to set it to the object version of the last
successful read in the series.




>  		ceph_osdc_put_request(req);
>  
>  		i_size = i_size_read(inode);
> @@ -1020,7 +1024,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
>  	     (unsigned)iov_iter_count(to),
>  	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
>  
> -	return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op);
> +	return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL);
>  }
>  
>  struct ceph_aio_request {
> diff --git a/fs/ceph/super.h b/fs/ceph/super.h
> index 84fec46308b0..a7bdb28af595 100644
> --- a/fs/ceph/super.h
> +++ b/fs/ceph/super.h
> @@ -1258,7 +1258,8 @@ extern int ceph_open(struct inode *inode, struct file *file);
>  extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
>  			    struct file *file, unsigned flags, umode_t mode);
>  extern ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
> -				struct iov_iter *to, int *retry_op);
> +				struct iov_iter *to, int *retry_op,
> +				u64 *last_objver);
>  extern int ceph_release(struct inode *inode, struct file *filp);
>  extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
>  				  char *data, size_t len);
Xiubo Li Dec. 9, 2021, 12:56 a.m. UTC | #2
On 12/9/21 2:22 AM, Jeff Layton wrote:
> On Wed, 2021-12-08 at 10:33 -0500, Jeff Layton wrote:
>> On Wed, 2021-12-08 at 20:45 +0800, xiubli@redhat.com wrote:
>>> From: Xiubo Li <xiubli@redhat.com>
>>>
>>> Always return the last object's version.
>>>
>>> Signed-off-by: Xiubo Li <xiubli@redhat.com>
>>> ---
>>>   fs/ceph/file.c  | 8 ++++++--
>>>   fs/ceph/super.h | 3 ++-
>>>   2 files changed, 8 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
>>> index b42158c9aa16..9279b8642add 100644
>>> --- a/fs/ceph/file.c
>>> +++ b/fs/ceph/file.c
>>> @@ -883,7 +883,8 @@ enum {
>>>    * only return a short read to the caller if we hit EOF.
>>>    */
>>>   ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>>> -			 struct iov_iter *to, int *retry_op)
>>> +			 struct iov_iter *to, int *retry_op,
>>> +			 u64 *last_objver)
>>>   {
>>>   	struct ceph_inode_info *ci = ceph_inode(inode);
>>>   	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
>>> @@ -950,6 +951,9 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>>>   					 req->r_end_latency,
>>>   					 len, ret);
>>>   
>>> +		if (last_objver)
>>> +			*last_objver = req->r_version;
>>> +
>> Much better! That said, this might be unreliable if (say) the first OSD
>> read was successful and then the second one failed on a long read that
>> spans objects. We'd want to return a short read in that case, but the
>> last_objver would end up being set to 0.
>>
>> I think you shouldn't set last_objver unless the call is going to return
>>> 0, and then you want to set it to the object version of the last
>> successful read in the series.
>>
Make sense to me.


> Since this was a simple change, I went ahead and folded the patch below
> into this patch and updated wip-fscrypt-size. Let me know if you have
> any objections:
>
> [PATCH] SQUASH: only set last_objver iff we're returning success
>
> Signed-off-by: Jeff Layton <jlayton@kernel.org>
> ---
>   fs/ceph/file.c | 8 +++++---
>   1 file changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/fs/ceph/file.c b/fs/ceph/file.c
> index 9279b8642add..ee6fb642cf05 100644
> --- a/fs/ceph/file.c
> +++ b/fs/ceph/file.c
> @@ -893,6 +893,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>   	u64 off = *ki_pos;
>   	u64 len = iov_iter_count(to);
>   	u64 i_size = i_size_read(inode);
> +	u64 objver = 0;
>   
>   	dout("sync_read on inode %p %llu~%u\n", inode, *ki_pos, (unsigned)len);
>   
> @@ -951,9 +952,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>   					 req->r_end_latency,
>   					 len, ret);
>   
> -		if (last_objver)
> -			*last_objver = req->r_version;
> -
> +		objver = req->r_version;
>   		ceph_osdc_put_request(req);
>   
>   		i_size = i_size_read(inode);
> @@ -1010,6 +1009,9 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
>   		}
>   	}
>   
> +	if (ret > 0)
> +		*last_objver = objver;
> +
>   	dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
>   	return ret;
>   }

This also looks good to me :-)

Thanks.

BRs
diff mbox series

Patch

diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index b42158c9aa16..9279b8642add 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -883,7 +883,8 @@  enum {
  * only return a short read to the caller if we hit EOF.
  */
 ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
-			 struct iov_iter *to, int *retry_op)
+			 struct iov_iter *to, int *retry_op,
+			 u64 *last_objver)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -950,6 +951,9 @@  ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
 					 req->r_end_latency,
 					 len, ret);
 
+		if (last_objver)
+			*last_objver = req->r_version;
+
 		ceph_osdc_put_request(req);
 
 		i_size = i_size_read(inode);
@@ -1020,7 +1024,7 @@  static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
 	     (unsigned)iov_iter_count(to),
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
-	return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op);
+	return __ceph_sync_read(inode, &iocb->ki_pos, to, retry_op, NULL);
 }
 
 struct ceph_aio_request {
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 84fec46308b0..a7bdb28af595 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1258,7 +1258,8 @@  extern int ceph_open(struct inode *inode, struct file *file);
 extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags, umode_t mode);
 extern ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
-				struct iov_iter *to, int *retry_op);
+				struct iov_iter *to, int *retry_op,
+				u64 *last_objver);
 extern int ceph_release(struct inode *inode, struct file *filp);
 extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
 				  char *data, size_t len);