diff mbox series

[for-4.19] erofs: fix extended inode could cross boundary

Message ID 20210426082933.4040996-1-hsiangkao@redhat.com
State New
Headers show
Series [for-4.19] erofs: fix extended inode could cross boundary | expand

Commit Message

Gao Xiang April 26, 2021, 8:29 a.m. UTC
commit 0dcd3c94e02438f4a571690e26f4ee997524102a upstream.

Each ondisk inode should be aligned with inode slot boundary
(32-byte alignment) because of nid calculation formula, so all
compact inodes (32 byte) cannot across page boundary. However,
extended inode is now 64-byte form, which can across page boundary
in principle if the location is specified on purpose, although
it's hard to be generated by mkfs due to the allocation policy
and rarely used by Android use case now mainly for > 4GiB files.

For now, only two fields `i_ctime_nsec` and `i_nlink' couldn't
be read from disk properly and cause out-of-bound memory read
with random value.

Let's fix now.

Fixes: 431339ba9042 ("staging: erofs: add inode operations")
Cc: <stable@vger.kernel.org> # 4.19+
Link: https://lore.kernel.org/r/20200729175801.GA23973@xiangao.remote.csb
Reviewed-by: Chao Yu <yuchao0@huawei.com>
[ Gao Xiang: resolve non-trivial conflicts for latest 4.19.y. ]
Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
---
 drivers/staging/erofs/inode.c | 135 ++++++++++++++++++++++------------
 1 file changed, 90 insertions(+), 45 deletions(-)

Comments

Greg KH April 26, 2021, 8:39 a.m. UTC | #1
On Mon, Apr 26, 2021 at 04:29:33PM +0800, Gao Xiang wrote:
> commit 0dcd3c94e02438f4a571690e26f4ee997524102a upstream.
> 
> Each ondisk inode should be aligned with inode slot boundary
> (32-byte alignment) because of nid calculation formula, so all
> compact inodes (32 byte) cannot across page boundary. However,
> extended inode is now 64-byte form, which can across page boundary
> in principle if the location is specified on purpose, although
> it's hard to be generated by mkfs due to the allocation policy
> and rarely used by Android use case now mainly for > 4GiB files.
> 
> For now, only two fields `i_ctime_nsec` and `i_nlink' couldn't
> be read from disk properly and cause out-of-bound memory read
> with random value.
> 
> Let's fix now.
> 
> Fixes: 431339ba9042 ("staging: erofs: add inode operations")
> Cc: <stable@vger.kernel.org> # 4.19+
> Link: https://lore.kernel.org/r/20200729175801.GA23973@xiangao.remote.csb
> Reviewed-by: Chao Yu <yuchao0@huawei.com>
> [ Gao Xiang: resolve non-trivial conflicts for latest 4.19.y. ]
> Signed-off-by: Gao Xiang <hsiangkao@redhat.com>
> ---
>  drivers/staging/erofs/inode.c | 135 ++++++++++++++++++++++------------
>  1 file changed, 90 insertions(+), 45 deletions(-)

Thanks for the backport, I'll queue it up after this latest round of
stable kernels is released later this week.

greg k-h
diff mbox series

Patch

diff --git a/drivers/staging/erofs/inode.c b/drivers/staging/erofs/inode.c
index 12a5be95457f..a43abd530cc1 100644
--- a/drivers/staging/erofs/inode.c
+++ b/drivers/staging/erofs/inode.c
@@ -14,26 +14,78 @@ 
 
 #include <trace/events/erofs.h>
 
-/* no locking */
-static int read_inode(struct inode *inode, void *data)
+/*
+ * if inode is successfully read, return its inode page (or sometimes
+ * the inode payload page if it's an extended inode) in order to fill
+ * inline data if possible.
+ */
+static struct page *read_inode(struct inode *inode, unsigned int *ofs)
 {
+	struct super_block *sb = inode->i_sb;
+	struct erofs_sb_info *sbi = EROFS_SB(sb);
 	struct erofs_vnode *vi = EROFS_V(inode);
-	struct erofs_inode_v1 *v1 = data;
-	const unsigned advise = le16_to_cpu(v1->i_advise);
+	const erofs_off_t inode_loc = iloc(sbi, vi->nid);
+	erofs_blk_t blkaddr;
+	struct page *page;
+	struct erofs_inode_v1 *v1;
+	struct erofs_inode_v2 *v2, *copied = NULL;
+	unsigned int ifmt;
+	int err;
 
-	vi->data_mapping_mode = __inode_data_mapping(advise);
+	blkaddr = erofs_blknr(inode_loc);
+	*ofs = erofs_blkoff(inode_loc);
 
+	debugln("%s, reading inode nid %llu at %u of blkaddr %u",
+		__func__, vi->nid, *ofs, blkaddr);
+
+	page = erofs_get_meta_page(sb, blkaddr, false);
+	if (IS_ERR(page)) {
+		errln("failed to get inode (nid: %llu) page, err %ld",
+		      vi->nid, PTR_ERR(page));
+		return page;
+	}
+
+	v1 = page_address(page) + *ofs;
+	ifmt = le16_to_cpu(v1->i_advise);
+
+	vi->data_mapping_mode = __inode_data_mapping(ifmt);
 	if (unlikely(vi->data_mapping_mode >= EROFS_INODE_LAYOUT_MAX)) {
 		errln("unknown data mapping mode %u of nid %llu",
 			vi->data_mapping_mode, vi->nid);
-		DBG_BUGON(1);
-		return -EIO;
+		err = -EOPNOTSUPP;
+		goto err_out;
 	}
 
-	if (__inode_version(advise) == EROFS_INODE_LAYOUT_V2) {
-		struct erofs_inode_v2 *v2 = data;
-
+	switch (__inode_version(ifmt)) {
+	case EROFS_INODE_LAYOUT_V2:
 		vi->inode_isize = sizeof(struct erofs_inode_v2);
+		/* check if the inode acrosses page boundary */
+		if (*ofs + vi->inode_isize <= PAGE_SIZE) {
+			*ofs += vi->inode_isize;
+			v2 = (struct erofs_inode_v2 *)v1;
+		} else {
+			const unsigned int gotten = PAGE_SIZE - *ofs;
+
+			copied = kmalloc(vi->inode_isize, GFP_NOFS);
+			if (!copied) {
+				err = -ENOMEM;
+				goto err_out;
+			}
+			memcpy(copied, v1, gotten);
+			unlock_page(page);
+			put_page(page);
+
+			page = erofs_get_meta_page(sb, blkaddr + 1, false);
+			if (IS_ERR(page)) {
+				errln("failed to get inode payload page (nid: %llu), err %ld",
+				      vi->nid, PTR_ERR(page));
+				kfree(copied);
+				return page;
+			}
+			*ofs = vi->inode_isize - gotten;
+			memcpy((u8 *)copied + gotten, page_address(page), *ofs);
+			v2 = copied;
+		}
 		vi->xattr_isize = ondisk_xattr_ibody_size(v2->i_xattr_icount);
 
 		inode->i_mode = le16_to_cpu(v2->i_mode);
@@ -46,7 +98,7 @@  static int read_inode(struct inode *inode, void *data)
 		} else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 			inode->i_rdev = 0;
 		} else {
-			return -EIO;
+			goto bogusimode;
 		}
 
 		i_uid_write(inode, le32_to_cpu(v2->i_uid));
@@ -58,10 +110,11 @@  static int read_inode(struct inode *inode, void *data)
 		inode->i_ctime.tv_nsec = le32_to_cpu(v2->i_ctime_nsec);
 
 		inode->i_size = le64_to_cpu(v2->i_size);
-	} else if (__inode_version(advise) == EROFS_INODE_LAYOUT_V1) {
-		struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
-
+		kfree(copied);
+		break;
+	case EROFS_INODE_LAYOUT_V1:
 		vi->inode_isize = sizeof(struct erofs_inode_v1);
+		*ofs += vi->inode_isize;
 		vi->xattr_isize = ondisk_xattr_ibody_size(v1->i_xattr_icount);
 
 		inode->i_mode = le16_to_cpu(v1->i_mode);
@@ -74,7 +127,7 @@  static int read_inode(struct inode *inode, void *data)
 		} else if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
 			inode->i_rdev = 0;
 		} else {
-			return -EIO;
+			goto bogusimode;
 		}
 
 		i_uid_write(inode, le16_to_cpu(v1->i_uid));
@@ -86,11 +139,12 @@  static int read_inode(struct inode *inode, void *data)
 		inode->i_ctime.tv_nsec = sbi->build_time_nsec;
 
 		inode->i_size = le32_to_cpu(v1->i_size);
-	} else {
+		break;
+	default:
 		errln("unsupported on-disk inode version %u of nid %llu",
-			__inode_version(advise), vi->nid);
-		DBG_BUGON(1);
-		return -EIO;
+		      __inode_version(ifmt), vi->nid);
+		err = -EOPNOTSUPP;
+		goto err_out;
 	}
 
 	inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
@@ -100,7 +154,16 @@  static int read_inode(struct inode *inode, void *data)
 
 	/* measure inode.i_blocks as the generic filesystem */
 	inode->i_blocks = ((inode->i_size - 1) >> 9) + 1;
-	return 0;
+	return page;
+bogusimode:
+	errln("bogus i_mode (%o) @ nid %llu", inode->i_mode, vi->nid);
+	err = -EIO;
+err_out:
+	DBG_BUGON(1);
+	kfree(copied);
+	unlock_page(page);
+	put_page(page);
+	return ERR_PTR(err);
 }
 
 /*
@@ -132,7 +195,7 @@  static int fill_inline_data(struct inode *inode, void *data, unsigned m_pofs)
 		if (unlikely(lnk == NULL))
 			return -ENOMEM;
 
-		m_pofs += vi->inode_isize + vi->xattr_isize;
+		m_pofs += vi->xattr_isize;
 
 		/* inline symlink data shouldn't across page boundary as well */
 		if (unlikely(m_pofs + inode->i_size > PAGE_SIZE)) {
@@ -153,35 +216,17 @@  static int fill_inline_data(struct inode *inode, void *data, unsigned m_pofs)
 
 static int fill_inode(struct inode *inode, int isdir)
 {
-	struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
-	struct erofs_vnode *vi = EROFS_V(inode);
 	struct page *page;
-	void *data;
-	int err;
-	erofs_blk_t blkaddr;
-	unsigned ofs;
+	unsigned int ofs;
+	int err = 0;
 
 	trace_erofs_fill_inode(inode, isdir);
 
-	blkaddr = erofs_blknr(iloc(sbi, vi->nid));
-	ofs = erofs_blkoff(iloc(sbi, vi->nid));
-
-	debugln("%s, reading inode nid %llu at %u of blkaddr %u",
-		__func__, vi->nid, ofs, blkaddr);
-
-	page = erofs_get_meta_page(inode->i_sb, blkaddr, isdir);
-
+	/* read inode base data from disk */
+	page = read_inode(inode, &ofs);
 	if (IS_ERR(page)) {
-		errln("failed to get inode (nid: %llu) page, err %ld",
-			vi->nid, PTR_ERR(page));
 		return PTR_ERR(page);
-	}
-
-	DBG_BUGON(!PageUptodate(page));
-	data = page_address(page);
-
-	err = read_inode(inode, data + ofs);
-	if (!err) {
+	} else {
 		/* setup the new inode */
 		if (S_ISREG(inode->i_mode)) {
 #ifdef CONFIG_EROFS_FS_XATTR
@@ -229,7 +274,7 @@  static int fill_inode(struct inode *inode, int isdir)
 		inode->i_mapping->a_ops = &erofs_raw_access_aops;
 
 		/* fill last page if inline data is available */
-		fill_inline_data(inode, data, ofs);
+		fill_inline_data(inode, page_address(page), ofs);
 	}
 
 out_unlock: