diff mbox series

[v5,16/30] arm64: handle PKEY/POE faults

Message ID 20240822151113.1479789-17-joey.gouly@arm.com
State New
Headers show
Series Permission Overlay Extension | expand

Commit Message

Joey Gouly Aug. 22, 2024, 3:10 p.m. UTC
If a memory fault occurs that is due to an overlay/pkey fault, report that to
userspace with a SEGV_PKUERR.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm64/include/asm/traps.h |  1 +
 arch/arm64/kernel/traps.c      |  6 ++++
 arch/arm64/mm/fault.c          | 55 +++++++++++++++++++++++++++++++++-
 3 files changed, 61 insertions(+), 1 deletion(-)

Comments

Mark Brown Aug. 29, 2024, 5:55 p.m. UTC | #1
On Thu, Aug 22, 2024 at 04:10:59PM +0100, Joey Gouly wrote:

> +static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
> +			unsigned int mm_flags)
> +{
> +	unsigned long iss2 = ESR_ELx_ISS2(esr);
> +
> +	if (!system_supports_poe())
> +		return false;
> +
> +	if (iss2 & ESR_ELx_Overlay)
> +		return true;

Does this need an is_data_abort() && is_instruction_abort() check?
Overlay doesn't appear to be defined for all exception types and it
wasn't clear enough to me that the callers have done this check.
Joey Gouly Sept. 3, 2024, 2:50 p.m. UTC | #2
On Thu, Aug 29, 2024 at 06:55:07PM +0100, Mark Brown wrote:
> On Thu, Aug 22, 2024 at 04:10:59PM +0100, Joey Gouly wrote:
> 
> > +static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
> > +			unsigned int mm_flags)
> > +{
> > +	unsigned long iss2 = ESR_ELx_ISS2(esr);
> > +
> > +	if (!system_supports_poe())
> > +		return false;
> > +
> > +	if (iss2 & ESR_ELx_Overlay)
> > +		return true;
> 
> Does this need an is_data_abort() && is_instruction_abort() check?
> Overlay doesn't appear to be defined for all exception types and it
> wasn't clear enough to me that the callers have done this check.

The only callers are in do_page_fault(), which should only be data or
instruction aborts. I talked with Catalin and he said it's fine to not check
again here.

I can add a permissions check though:

commit 033270f5a9462e998b4dee11fc91b43ac7929756
Author: Joey Gouly <joey.gouly@arm.com>
Date:   Tue Sep 3 15:45:59 2024 +0100

    fixup! arm64: handle PKEY/POE faults

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index a68055150950..f651553a8ab8 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -495,6 +495,9 @@ static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
        if (!system_supports_poe())
                return false;
 
+       if (!esr_fsc_is_permission_fault(esr))
+               return false;
+
        if (iss2 & ESR_ELx_Overlay)
                return true;
 


Since the ESR_EL1 documentation says:
	If a memory access generates a Data Abort for a Permission fault, then this field holds information
	about the fault.

Thanks,
Joey
diff mbox series

Patch

diff --git arch/arm64/include/asm/traps.h arch/arm64/include/asm/traps.h
index eefe766d6161..d780d1bd2eac 100644
--- arch/arm64/include/asm/traps.h
+++ arch/arm64/include/asm/traps.h
@@ -25,6 +25,7 @@  try_emulate_armv8_deprecated(struct pt_regs *regs, u32 insn)
 void force_signal_inject(int signal, int code, unsigned long address, unsigned long err);
 void arm64_notify_segfault(unsigned long addr);
 void arm64_force_sig_fault(int signo, int code, unsigned long far, const char *str);
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey);
 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb, const char *str);
 void arm64_force_sig_ptrace_errno_trap(int errno, unsigned long far, const char *str);
 
diff --git arch/arm64/kernel/traps.c arch/arm64/kernel/traps.c
index 9e22683aa921..9a11bb0db284 100644
--- arch/arm64/kernel/traps.c
+++ arch/arm64/kernel/traps.c
@@ -273,6 +273,12 @@  void arm64_force_sig_fault(int signo, int code, unsigned long far,
 		force_sig_fault(signo, code, (void __user *)far);
 }
 
+void arm64_force_sig_fault_pkey(unsigned long far, const char *str, int pkey)
+{
+	arm64_show_signal(SIGSEGV, str);
+	force_sig_pkuerr((void __user *)far, pkey);
+}
+
 void arm64_force_sig_mceerr(int code, unsigned long far, short lsb,
 			    const char *str)
 {
diff --git arch/arm64/mm/fault.c arch/arm64/mm/fault.c
index 451ba7cbd5ad..a68055150950 100644
--- arch/arm64/mm/fault.c
+++ arch/arm64/mm/fault.c
@@ -23,6 +23,7 @@ 
 #include <linux/sched/debug.h>
 #include <linux/highmem.h>
 #include <linux/perf_event.h>
+#include <linux/pkeys.h>
 #include <linux/preempt.h>
 #include <linux/hugetlb.h>
 
@@ -486,6 +487,23 @@  static void do_bad_area(unsigned long far, unsigned long esr,
 	}
 }
 
+static bool fault_from_pkey(unsigned long esr, struct vm_area_struct *vma,
+			unsigned int mm_flags)
+{
+	unsigned long iss2 = ESR_ELx_ISS2(esr);
+
+	if (!system_supports_poe())
+		return false;
+
+	if (iss2 & ESR_ELx_Overlay)
+		return true;
+
+	return !arch_vma_access_permitted(vma,
+			mm_flags & FAULT_FLAG_WRITE,
+			mm_flags & FAULT_FLAG_INSTRUCTION,
+			false);
+}
+
 static bool is_el0_instruction_abort(unsigned long esr)
 {
 	return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_LOW;
@@ -511,6 +529,7 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 	unsigned long addr = untagged_addr(far);
 	struct vm_area_struct *vma;
 	int si_code;
+	int pkey = -1;
 
 	if (kprobe_page_fault(regs, esr))
 		return 0;
@@ -575,6 +594,16 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
 		goto bad_area;
 	}
+
+	if (fault_from_pkey(esr, vma, mm_flags)) {
+		pkey = vma_pkey(vma);
+		vma_end_read(vma);
+		fault = 0;
+		si_code = SEGV_PKUERR;
+		count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+		goto bad_area;
+	}
+
 	fault = handle_mm_fault(vma, addr, mm_flags | FAULT_FLAG_VMA_LOCK, regs);
 	if (!(fault & (VM_FAULT_RETRY | VM_FAULT_COMPLETED)))
 		vma_end_read(vma);
@@ -610,7 +639,16 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 		goto bad_area;
 	}
 
+	if (fault_from_pkey(esr, vma, mm_flags)) {
+		pkey = vma_pkey(vma);
+		mmap_read_unlock(mm);
+		fault = 0;
+		si_code = SEGV_PKUERR;
+		goto bad_area;
+	}
+
 	fault = handle_mm_fault(vma, addr, mm_flags, regs);
+
 	/* Quick path to respond to signals */
 	if (fault_signal_pending(fault, regs)) {
 		if (!user_mode(regs))
@@ -669,8 +707,23 @@  static int __kprobes do_page_fault(unsigned long far, unsigned long esr,
 
 		arm64_force_sig_mceerr(BUS_MCEERR_AR, far, lsb, inf->name);
 	} else {
+		/*
+		 * The pkey value that we return to userspace can be different
+		 * from the pkey that caused the fault.
+		 *
+		 * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
+		 * 2. T1   : set POR_EL0 to deny access to pkey=4, touches, page
+		 * 3. T1   : faults...
+		 * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
+		 * 5. T1   : enters fault handler, takes mmap_lock, etc...
+		 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
+		 *	     faulted on a pte with its pkey=4.
+		 */
 		/* Something tried to access memory that out of memory map */
-		arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
+		if (si_code == SEGV_PKUERR)
+			arm64_force_sig_fault_pkey(far, inf->name, pkey);
+		else
+			arm64_force_sig_fault(SIGSEGV, si_code, far, inf->name);
 	}
 
 	return 0;