@@ -934,7 +934,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
u64 off = *ki_pos;
u64 len = iov_iter_count(to);
u64 i_size = i_size_read(inode);
- bool sparse = ceph_test_mount_opt(fsc, SPARSEREAD);
+ bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
u64 objver = 0;
dout("sync_read on inode %p %llx~%llx\n", inode, *ki_pos, len);
@@ -962,10 +962,19 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
int idx;
size_t left;
struct ceph_osd_req_op *op;
+ u64 read_off = off;
+ u64 read_len = len;
+
+ /* determine new offset/length if encrypted */
+ ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len);
+
+ dout("sync_read orig %llu~%llu reading %llu~%llu",
+ off, len, read_off, read_len);
req = ceph_osdc_new_request(osdc, &ci->i_layout,
- ci->i_vino, off, &len, 0, 1,
- sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ,
+ ci->i_vino, read_off, &read_len, 0, 1,
+ sparse ? CEPH_OSD_OP_SPARSE_READ :
+ CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ,
NULL, ci->i_truncate_seq,
ci->i_truncate_size, false);
@@ -974,10 +983,13 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
+ /* adjust len downward if the request truncated the len */
+ if (off + len > read_off + read_len)
+ len = read_off + read_len - off;
more = len < iov_iter_count(to);
- num_pages = calc_pages_for(off, len);
- page_off = off & ~PAGE_MASK;
+ num_pages = calc_pages_for(read_off, read_len);
+ page_off = offset_in_page(off);
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages)) {
ceph_osdc_put_request(req);
@@ -985,7 +997,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
+ osd_req_op_extent_osd_data_pages(req, 0, pages, read_len,
+ offset_in_page(read_off),
false, false);
op = &req->r_ops[0];
@@ -1004,7 +1017,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
ceph_update_read_metrics(&fsc->mdsc->metric,
req->r_start_latency,
req->r_end_latency,
- len, ret);
+ read_len, ret);
if (ret > 0)
objver = req->r_version;
@@ -1019,8 +1032,34 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
else if (ret == -ENOENT)
ret = 0;
+ if (ret > 0 && IS_ENCRYPTED(inode)) {
+ int fret;
+
+ fret = ceph_fscrypt_decrypt_extents(inode, pages, read_off,
+ op->extent.sparse_ext, op->extent.sparse_ext_cnt);
+ if (fret < 0) {
+ ret = fret;
+ ceph_osdc_put_request(req);
+ break;
+ }
+
+ /* account for any partial block at the beginning */
+ fret -= (off - read_off);
+
+ /*
+ * Short read after big offset adjustment?
+ * Nothing is usable, just call it a zero
+ * len read.
+ */
+ fret = max(fret, 0);
+
+ /* account for partial block at the end */
+ ret = min_t(ssize_t, fret, len);
+ }
+
ceph_osdc_put_request(req);
+ /* Short read but not EOF? Zero out the remainder. */
if (ret >= 0 && ret < len && (off + ret < i_size)) {
int zlen = min(len - ret, i_size - off - ret);
int zoff = page_off + ret;
@@ -1034,15 +1073,16 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
idx = 0;
left = ret > 0 ? ret : 0;
while (left > 0) {
- size_t len, copied;
- page_off = off & ~PAGE_MASK;
- len = min_t(size_t, left, PAGE_SIZE - page_off);
+ size_t plen, copied;
+
+ plen = min_t(size_t, left, PAGE_SIZE - page_off);
SetPageUptodate(pages[idx]);
copied = copy_page_to_iter(pages[idx++],
- page_off, len, to);
+ page_off, plen, to);
off += copied;
left -= copied;
- if (copied < len) {
+ page_off = 0;
+ if (copied < plen) {
ret = -EFAULT;
break;
}
@@ -1059,20 +1099,21 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos,
break;
}
- if (off > *ki_pos) {
- if (off >= i_size) {
- *retry_op = CHECK_EOF;
- ret = i_size - *ki_pos;
- *ki_pos = i_size;
- } else {
- ret = off - *ki_pos;
- *ki_pos = off;
+ if (ret > 0) {
+ if (off > *ki_pos) {
+ if (off >= i_size) {
+ *retry_op = CHECK_EOF;
+ ret = i_size - *ki_pos;
+ *ki_pos = i_size;
+ } else {
+ ret = off - *ki_pos;
+ *ki_pos = off;
+ }
}
- }
-
- if (last_objver && ret > 0)
- *last_objver = objver;
+ if (last_objver)
+ *last_objver = objver;
+ }
dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
return ret;
}
Switch to using sparse reads when the inode is encrypted. Note that the crypto block may be smaller than a page, but the reverse cannot be true. Signed-off-by: Jeff Layton <jlayton@kernel.org> --- fs/ceph/file.c | 89 ++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 65 insertions(+), 24 deletions(-)