diff mbox series

[v5,6/7] tcg: implement JIT for iOS and Apple Silicon

Message ID 20201108232425.1705-7-j@getutm.app
State New
Headers show
Series [v5,1/7] configure: option to disable host block devices | expand

Commit Message

Joelle van Dyne Nov. 8, 2020, 11:24 p.m. UTC
When entitlements are available (macOS or jailbroken iOS), a hardware
feature called APRR exists on newer Apple Silicon that can cheaply mark JIT
pages as either RX or RW. Reverse engineered functions from
libsystem_pthread.dylib are implemented to handle this.

The following rules apply for JIT write protect:
  * JIT write-protect is enabled before tcg_qemu_tb_exec()
  * JIT write-protect is disabled after tcg_qemu_tb_exec() returns
  * JIT write-protect is disabled inside do_tb_phys_invalidate() but if it
    is called inside of tcg_qemu_tb_exec() then write-protect will be
    enabled again before returning.
  * JIT write-protect is disabled by cpu_loop_exit() for interrupt handling.
  * JIT write-protect is disabled everywhere else.

See https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon

Signed-off-by: Joelle van Dyne <j@getutm.app>
---
 include/exec/exec-all.h     |  2 +
 include/tcg/tcg-apple-jit.h | 86 +++++++++++++++++++++++++++++++++++++
 include/tcg/tcg.h           |  3 ++
 accel/tcg/cpu-exec-common.c |  2 +
 accel/tcg/cpu-exec.c        |  2 +
 accel/tcg/translate-all.c   | 46 ++++++++++++++++++++
 tcg/tcg.c                   |  4 ++
 7 files changed, 145 insertions(+)
 create mode 100644 include/tcg/tcg-apple-jit.h

Comments

Alexander Graf Nov. 20, 2020, 9:08 a.m. UTC | #1
On 09.11.20 00:24, Joelle van Dyne wrote:
> When entitlements are available (macOS or jailbroken iOS), a hardware

> feature called APRR exists on newer Apple Silicon that can cheaply mark JIT

> pages as either RX or RW. Reverse engineered functions from

> libsystem_pthread.dylib are implemented to handle this.

>

> The following rules apply for JIT write protect:

>    * JIT write-protect is enabled before tcg_qemu_tb_exec()

>    * JIT write-protect is disabled after tcg_qemu_tb_exec() returns

>    * JIT write-protect is disabled inside do_tb_phys_invalidate() but if it

>      is called inside of tcg_qemu_tb_exec() then write-protect will be

>      enabled again before returning.

>    * JIT write-protect is disabled by cpu_loop_exit() for interrupt handling.

>    * JIT write-protect is disabled everywhere else.

>

> See https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon

>

> Signed-off-by: Joelle van Dyne <j@getutm.app>

> ---

>   include/exec/exec-all.h     |  2 +

>   include/tcg/tcg-apple-jit.h | 86 +++++++++++++++++++++++++++++++++++++

>   include/tcg/tcg.h           |  3 ++

>   accel/tcg/cpu-exec-common.c |  2 +

>   accel/tcg/cpu-exec.c        |  2 +

>   accel/tcg/translate-all.c   | 46 ++++++++++++++++++++

>   tcg/tcg.c                   |  4 ++

>   7 files changed, 145 insertions(+)

>   create mode 100644 include/tcg/tcg-apple-jit.h

>

> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

> index aa65103702..3829f3d470 100644

> --- a/include/exec/exec-all.h

> +++ b/include/exec/exec-all.h

> @@ -549,6 +549,8 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,

>                                      target_ulong cs_base, uint32_t flags,

>                                      uint32_t cf_mask);

>   void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);

> +void tb_exec_lock(void);

> +void tb_exec_unlock(void);

>   

>   /* GETPC is the true target of the return instruction that we'll execute.  */

>   #if defined(CONFIG_TCG_INTERPRETER)

> diff --git a/include/tcg/tcg-apple-jit.h b/include/tcg/tcg-apple-jit.h

> new file mode 100644

> index 0000000000..9efdb2000d

> --- /dev/null

> +++ b/include/tcg/tcg-apple-jit.h

> @@ -0,0 +1,86 @@

> +/*

> + * Apple Silicon functions for JIT handling

> + *

> + * Copyright (c) 2020 osy

> + *

> + * This library is free software; you can redistribute it and/or

> + * modify it under the terms of the GNU Lesser General Public

> + * License as published by the Free Software Foundation; either

> + * version 2.1 of the License, or (at your option) any later version.

> + *

> + * This library is distributed in the hope that it will be useful,

> + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> + * Lesser General Public License for more details.

> + *

> + * You should have received a copy of the GNU Lesser General Public

> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.

> + */

> +

> +#ifndef TCG_APPLE_JIT_H

> +#define TCG_APPLE_JIT_H

> +

> +/*

> + * APRR handling

> + * Credits to: https://siguza.github.io/APRR/

> + * Reversed from /usr/lib/system/libsystem_pthread.dylib

> + */

> +

> +#if defined(__aarch64__) && defined(CONFIG_DARWIN)

> +

> +#define _COMM_PAGE_START_ADDRESS        (0x0000000FFFFFC000ULL) /* In TTBR0 */

> +#define _COMM_PAGE_APRR_SUPPORT         (_COMM_PAGE_START_ADDRESS + 0x10C)

> +#define _COMM_PAGE_APPR_WRITE_ENABLE    (_COMM_PAGE_START_ADDRESS + 0x110)

> +#define _COMM_PAGE_APRR_WRITE_DISABLE   (_COMM_PAGE_START_ADDRESS + 0x118)

> +

> +static __attribute__((__always_inline__)) bool jit_write_protect_supported(void)

> +{

> +    /* Access shared kernel page at fixed memory location. */

> +    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;

> +    return aprr_support > 0;

> +}

> +

> +/* write protect enable = write disable */

> +static __attribute__((__always_inline__)) void jit_write_protect(int enabled)

> +{

> +    /* Access shared kernel page at fixed memory location. */

> +    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;

> +    if (aprr_support == 0 || aprr_support > 3) {

> +        return;

> +    } else if (aprr_support == 1) {

> +        __asm__ __volatile__ (

> +            "mov x0, %0\n"

> +            "ldr x0, [x0]\n"

> +            "msr S3_4_c15_c2_7, x0\n"

> +            "isb sy\n"

> +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

> +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

> +            : "memory", "x0"

> +        );

> +    } else {

> +        __asm__ __volatile__ (

> +            "mov x0, %0\n"

> +            "ldr x0, [x0]\n"

> +            "msr S3_6_c15_c1_5, x0\n"

> +            "isb sy\n"

> +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

> +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

> +            : "memory", "x0"

> +        );

> +    }

> +}



Is there a particular reason you're not just calling 
pthread_jit_write_protect_np()? That would remove the dependency on 
anything reverse engineered.


> +

> +#else /* defined(__aarch64__) && defined(CONFIG_DARWIN) */

> +

> +static __attribute__((__always_inline__)) bool jit_write_protect_supported(void)

> +{

> +    return false;

> +}

> +

> +static __attribute__((__always_inline__)) void jit_write_protect(int enabled)

> +{

> +}

> +

> +#endif

> +

> +#endif /* define TCG_APPLE_JIT_H */

> diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h

> index 477919aeb6..b16b687d0b 100644

> --- a/include/tcg/tcg.h

> +++ b/include/tcg/tcg.h

> @@ -625,6 +625,9 @@ struct TCGContext {

>       size_t code_gen_buffer_size;

>       void *code_gen_ptr;

>       void *data_gen_ptr;

> +#if defined(CONFIG_DARWIN) && !defined(CONFIG_TCG_INTERPRETER)

> +    bool code_gen_locked; /* on Darwin each thread tracks W^X flags */



I don't quite understand why you need to keep track of whether you're in 
locked state or not. If you just always keep in locked state and unlock 
around the few parts that modify the code gen region, you should be 
fine, no?


> +#endif

>   

>       /* Threshold to flush the translated code buffer.  */

>       void *code_gen_highwater;

> diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c

> index 12c1e3e974..f1eb767b02 100644

> --- a/accel/tcg/cpu-exec-common.c

> +++ b/accel/tcg/cpu-exec-common.c

> @@ -64,6 +64,8 @@ void cpu_reloading_memory_map(void)

>   

>   void cpu_loop_exit(CPUState *cpu)

>   {

> +    /* Unlock JIT write protect if applicable. */

> +    tb_exec_unlock();



Why do you need to unlock here? I think in general this patch is trying 
to keep the state RW always and only flip to RX when actually executing 
code, right?

I think it would be much easier and cleaner to do it reverse: Keep it in 
RX always and flip to RW when you need to modify.

Also, shouldn't the code gen buffer be allocated with MAP_JIT according 
to the porting guide?

Alex
Alexander Graf Nov. 20, 2020, 2:15 p.m. UTC | #2
On 20.11.20 10:08, Alexander Graf wrote:
>

> On 09.11.20 00:24, Joelle van Dyne wrote:

>> When entitlements are available (macOS or jailbroken iOS), a hardware

>> feature called APRR exists on newer Apple Silicon that can cheaply 

>> mark JIT

>> pages as either RX or RW. Reverse engineered functions from

>> libsystem_pthread.dylib are implemented to handle this.

>>

>> The following rules apply for JIT write protect:

>>    * JIT write-protect is enabled before tcg_qemu_tb_exec()

>>    * JIT write-protect is disabled after tcg_qemu_tb_exec() returns

>>    * JIT write-protect is disabled inside do_tb_phys_invalidate() but 

>> if it

>>      is called inside of tcg_qemu_tb_exec() then write-protect will be

>>      enabled again before returning.

>>    * JIT write-protect is disabled by cpu_loop_exit() for interrupt 

>> handling.

>>    * JIT write-protect is disabled everywhere else.

>>

>> See 

>> https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon

>>

>> Signed-off-by: Joelle van Dyne <j@getutm.app>

>> ---

>>   include/exec/exec-all.h     |  2 +

>>   include/tcg/tcg-apple-jit.h | 86 +++++++++++++++++++++++++++++++++++++

>>   include/tcg/tcg.h           |  3 ++

>>   accel/tcg/cpu-exec-common.c |  2 +

>>   accel/tcg/cpu-exec.c        |  2 +

>>   accel/tcg/translate-all.c   | 46 ++++++++++++++++++++

>>   tcg/tcg.c                   |  4 ++

>>   7 files changed, 145 insertions(+)

>>   create mode 100644 include/tcg/tcg-apple-jit.h

>>

>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

>> index aa65103702..3829f3d470 100644

>> --- a/include/exec/exec-all.h

>> +++ b/include/exec/exec-all.h

>> @@ -549,6 +549,8 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, 

>> target_ulong pc,

>>                                      target_ulong cs_base, uint32_t 

>> flags,

>>                                      uint32_t cf_mask);

>>   void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);

>> +void tb_exec_lock(void);

>> +void tb_exec_unlock(void);

>>     /* GETPC is the true target of the return instruction that we'll 

>> execute.  */

>>   #if defined(CONFIG_TCG_INTERPRETER)

>> diff --git a/include/tcg/tcg-apple-jit.h b/include/tcg/tcg-apple-jit.h

>> new file mode 100644

>> index 0000000000..9efdb2000d

>> --- /dev/null

>> +++ b/include/tcg/tcg-apple-jit.h

>> @@ -0,0 +1,86 @@

>> +/*

>> + * Apple Silicon functions for JIT handling

>> + *

>> + * Copyright (c) 2020 osy

>> + *

>> + * This library is free software; you can redistribute it and/or

>> + * modify it under the terms of the GNU Lesser General Public

>> + * License as published by the Free Software Foundation; either

>> + * version 2.1 of the License, or (at your option) any later version.

>> + *

>> + * This library is distributed in the hope that it will be useful,

>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of

>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

>> + * Lesser General Public License for more details.

>> + *

>> + * You should have received a copy of the GNU Lesser General Public

>> + * License along with this library; if not, see 

>> <http://www.gnu.org/licenses/>.

>> + */

>> +

>> +#ifndef TCG_APPLE_JIT_H

>> +#define TCG_APPLE_JIT_H

>> +

>> +/*

>> + * APRR handling

>> + * Credits to: https://siguza.github.io/APRR/

>> + * Reversed from /usr/lib/system/libsystem_pthread.dylib

>> + */

>> +

>> +#if defined(__aarch64__) && defined(CONFIG_DARWIN)

>> +

>> +#define _COMM_PAGE_START_ADDRESS        (0x0000000FFFFFC000ULL) /* 

>> In TTBR0 */

>> +#define _COMM_PAGE_APRR_SUPPORT (_COMM_PAGE_START_ADDRESS + 0x10C)

>> +#define _COMM_PAGE_APPR_WRITE_ENABLE (_COMM_PAGE_START_ADDRESS + 0x110)

>> +#define _COMM_PAGE_APRR_WRITE_DISABLE (_COMM_PAGE_START_ADDRESS + 

>> 0x118)

>> +

>> +static __attribute__((__always_inline__)) bool 

>> jit_write_protect_supported(void)

>> +{

>> +    /* Access shared kernel page at fixed memory location. */

>> +    uint8_t aprr_support = *(volatile uint8_t 

>> *)_COMM_PAGE_APRR_SUPPORT;

>> +    return aprr_support > 0;

>> +}

>> +

>> +/* write protect enable = write disable */

>> +static __attribute__((__always_inline__)) void jit_write_protect(int 

>> enabled)

>> +{

>> +    /* Access shared kernel page at fixed memory location. */

>> +    uint8_t aprr_support = *(volatile uint8_t 

>> *)_COMM_PAGE_APRR_SUPPORT;

>> +    if (aprr_support == 0 || aprr_support > 3) {

>> +        return;

>> +    } else if (aprr_support == 1) {

>> +        __asm__ __volatile__ (

>> +            "mov x0, %0\n"

>> +            "ldr x0, [x0]\n"

>> +            "msr S3_4_c15_c2_7, x0\n"

>> +            "isb sy\n"

>> +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

>> +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

>> +            : "memory", "x0"

>> +        );

>> +    } else {

>> +        __asm__ __volatile__ (

>> +            "mov x0, %0\n"

>> +            "ldr x0, [x0]\n"

>> +            "msr S3_6_c15_c1_5, x0\n"

>> +            "isb sy\n"

>> +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

>> +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

>> +            : "memory", "x0"

>> +        );

>> +    }

>> +}

>

>

> Is there a particular reason you're not just calling 

> pthread_jit_write_protect_np()? That would remove the dependency on 

> anything reverse engineered.

>

>

>> +

>> +#else /* defined(__aarch64__) && defined(CONFIG_DARWIN) */

>> +

>> +static __attribute__((__always_inline__)) bool 

>> jit_write_protect_supported(void)

>> +{

>> +    return false;

>> +}

>> +

>> +static __attribute__((__always_inline__)) void jit_write_protect(int 

>> enabled)

>> +{

>> +}

>> +

>> +#endif

>> +

>> +#endif /* define TCG_APPLE_JIT_H */

>> diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h

>> index 477919aeb6..b16b687d0b 100644

>> --- a/include/tcg/tcg.h

>> +++ b/include/tcg/tcg.h

>> @@ -625,6 +625,9 @@ struct TCGContext {

>>       size_t code_gen_buffer_size;

>>       void *code_gen_ptr;

>>       void *data_gen_ptr;

>> +#if defined(CONFIG_DARWIN) && !defined(CONFIG_TCG_INTERPRETER)

>> +    bool code_gen_locked; /* on Darwin each thread tracks W^X flags */

>

>

> I don't quite understand why you need to keep track of whether you're 

> in locked state or not. If you just always keep in locked state and 

> unlock around the few parts that modify the code gen region, you 

> should be fine, no?



I take this bit back. After fiddling with setting the flags the other 
way around, I think what you do here is better. Especially when it gets 
to exception handling, always treating the code region as writeable is 
better.


>

>

>> +#endif

>>         /* Threshold to flush the translated code buffer.  */

>>       void *code_gen_highwater;

>> diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c

>> index 12c1e3e974..f1eb767b02 100644

>> --- a/accel/tcg/cpu-exec-common.c

>> +++ b/accel/tcg/cpu-exec-common.c

>> @@ -64,6 +64,8 @@ void cpu_reloading_memory_map(void)

>>     void cpu_loop_exit(CPUState *cpu)

>>   {

>> +    /* Unlock JIT write protect if applicable. */

>> +    tb_exec_unlock();

>

>

> Why do you need to unlock here? I think in general this patch is 

> trying to keep the state RW always and only flip to RX when actually 

> executing code, right?

>

> I think it would be much easier and cleaner to do it reverse: Keep it 

> in RX always and flip to RW when you need to modify.

>

> Also, shouldn't the code gen buffer be allocated with MAP_JIT 

> according to the porting guide?



MAP_JIT is definitely missing to make it work on macos.

Also, I would prefer if you find a better name for the lock/unlock 
function. How about "tcg_set_codegen_mutable(bool)"? You can easily map 
that to the pthread call then.


Alex
Richard Henderson Nov. 20, 2020, 2:36 p.m. UTC | #3
On 11/20/20 6:15 AM, Alexander Graf wrote:
> MAP_JIT is definitely missing to make it work on macos.


As per the cover,

Based-on: 20201106032921.600200-1-richard.henderson@linaro.org
([PATCH v3 00/41] Mirror map JIT memory for TCG)

which contains

https://lists.nongnu.org/archive/html/qemu-devel/2020-11/msg01766.html

which sets MAP_JIT.


r~
Joelle van Dyne Nov. 20, 2020, 3:58 p.m. UTC | #4
On Fri, Nov 20, 2020 at 3:08 AM Alexander Graf <agraf@csgraf.de> wrote:
>

>

> On 09.11.20 00:24, Joelle van Dyne wrote:

> > When entitlements are available (macOS or jailbroken iOS), a hardware

> > feature called APRR exists on newer Apple Silicon that can cheaply mark JIT

> > pages as either RX or RW. Reverse engineered functions from

> > libsystem_pthread.dylib are implemented to handle this.

> >

> > The following rules apply for JIT write protect:

> >    * JIT write-protect is enabled before tcg_qemu_tb_exec()

> >    * JIT write-protect is disabled after tcg_qemu_tb_exec() returns

> >    * JIT write-protect is disabled inside do_tb_phys_invalidate() but if it

> >      is called inside of tcg_qemu_tb_exec() then write-protect will be

> >      enabled again before returning.

> >    * JIT write-protect is disabled by cpu_loop_exit() for interrupt handling.

> >    * JIT write-protect is disabled everywhere else.

> >

> > See https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon

> >

> > Signed-off-by: Joelle van Dyne <j@getutm.app>

> > ---

> >   include/exec/exec-all.h     |  2 +

> >   include/tcg/tcg-apple-jit.h | 86 +++++++++++++++++++++++++++++++++++++

> >   include/tcg/tcg.h           |  3 ++

> >   accel/tcg/cpu-exec-common.c |  2 +

> >   accel/tcg/cpu-exec.c        |  2 +

> >   accel/tcg/translate-all.c   | 46 ++++++++++++++++++++

> >   tcg/tcg.c                   |  4 ++

> >   7 files changed, 145 insertions(+)

> >   create mode 100644 include/tcg/tcg-apple-jit.h

> >

> > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

> > index aa65103702..3829f3d470 100644

> > --- a/include/exec/exec-all.h

> > +++ b/include/exec/exec-all.h

> > @@ -549,6 +549,8 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,

> >                                      target_ulong cs_base, uint32_t flags,

> >                                      uint32_t cf_mask);

> >   void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);

> > +void tb_exec_lock(void);

> > +void tb_exec_unlock(void);

> >

> >   /* GETPC is the true target of the return instruction that we'll execute.  */

> >   #if defined(CONFIG_TCG_INTERPRETER)

> > diff --git a/include/tcg/tcg-apple-jit.h b/include/tcg/tcg-apple-jit.h

> > new file mode 100644

> > index 0000000000..9efdb2000d

> > --- /dev/null

> > +++ b/include/tcg/tcg-apple-jit.h

> > @@ -0,0 +1,86 @@

> > +/*

> > + * Apple Silicon functions for JIT handling

> > + *

> > + * Copyright (c) 2020 osy

> > + *

> > + * This library is free software; you can redistribute it and/or

> > + * modify it under the terms of the GNU Lesser General Public

> > + * License as published by the Free Software Foundation; either

> > + * version 2.1 of the License, or (at your option) any later version.

> > + *

> > + * This library is distributed in the hope that it will be useful,

> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> > + * Lesser General Public License for more details.

> > + *

> > + * You should have received a copy of the GNU Lesser General Public

> > + * License along with this library; if not, see <http://www.gnu.org/licenses/>.

> > + */

> > +

> > +#ifndef TCG_APPLE_JIT_H

> > +#define TCG_APPLE_JIT_H

> > +

> > +/*

> > + * APRR handling

> > + * Credits to: https://siguza.github.io/APRR/

> > + * Reversed from /usr/lib/system/libsystem_pthread.dylib

> > + */

> > +

> > +#if defined(__aarch64__) && defined(CONFIG_DARWIN)

> > +

> > +#define _COMM_PAGE_START_ADDRESS        (0x0000000FFFFFC000ULL) /* In TTBR0 */

> > +#define _COMM_PAGE_APRR_SUPPORT         (_COMM_PAGE_START_ADDRESS + 0x10C)

> > +#define _COMM_PAGE_APPR_WRITE_ENABLE    (_COMM_PAGE_START_ADDRESS + 0x110)

> > +#define _COMM_PAGE_APRR_WRITE_DISABLE   (_COMM_PAGE_START_ADDRESS + 0x118)

> > +

> > +static __attribute__((__always_inline__)) bool jit_write_protect_supported(void)

> > +{

> > +    /* Access shared kernel page at fixed memory location. */

> > +    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;

> > +    return aprr_support > 0;

> > +}

> > +

> > +/* write protect enable = write disable */

> > +static __attribute__((__always_inline__)) void jit_write_protect(int enabled)

> > +{

> > +    /* Access shared kernel page at fixed memory location. */

> > +    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;

> > +    if (aprr_support == 0 || aprr_support > 3) {

> > +        return;

> > +    } else if (aprr_support == 1) {

> > +        __asm__ __volatile__ (

> > +            "mov x0, %0\n"

> > +            "ldr x0, [x0]\n"

> > +            "msr S3_4_c15_c2_7, x0\n"

> > +            "isb sy\n"

> > +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

> > +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

> > +            : "memory", "x0"

> > +        );

> > +    } else {

> > +        __asm__ __volatile__ (

> > +            "mov x0, %0\n"

> > +            "ldr x0, [x0]\n"

> > +            "msr S3_6_c15_c1_5, x0\n"

> > +            "isb sy\n"

> > +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

> > +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

> > +            : "memory", "x0"

> > +        );

> > +    }

> > +}

>

>

> Is there a particular reason you're not just calling

> pthread_jit_write_protect_np()? That would remove the dependency on

> anything reverse engineered.

Those APIs are not available on iOS 13 or below, which has the same
APRR requirements. If for legal reasons we cannot include this code,
then it is fine to remove this file and replace the calls with the
APIs, but we would lose support on lower iOS versions.

>

>

> > +

> > +#else /* defined(__aarch64__) && defined(CONFIG_DARWIN) */

> > +

> > +static __attribute__((__always_inline__)) bool jit_write_protect_supported(void)

> > +{

> > +    return false;

> > +}

> > +

> > +static __attribute__((__always_inline__)) void jit_write_protect(int enabled)

> > +{

> > +}

> > +

> > +#endif

> > +

> > +#endif /* define TCG_APPLE_JIT_H */

> > diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h

> > index 477919aeb6..b16b687d0b 100644

> > --- a/include/tcg/tcg.h

> > +++ b/include/tcg/tcg.h

> > @@ -625,6 +625,9 @@ struct TCGContext {

> >       size_t code_gen_buffer_size;

> >       void *code_gen_ptr;

> >       void *data_gen_ptr;

> > +#if defined(CONFIG_DARWIN) && !defined(CONFIG_TCG_INTERPRETER)

> > +    bool code_gen_locked; /* on Darwin each thread tracks W^X flags */

>

>

> I don't quite understand why you need to keep track of whether you're in

> locked state or not. If you just always keep in locked state and unlock

> around the few parts that modify the code gen region, you should be

> fine, no?

I thought so at first, but do_tb_phys_invalidate() can be called in
either state and even when looking at all the callers it's not
possible to easily derive the lock state without storing this
somewhere. If someone knows of a way, then this flag can be removed.

>

>

> > +#endif

> >

> >       /* Threshold to flush the translated code buffer.  */

> >       void *code_gen_highwater;

> > diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c

> > index 12c1e3e974..f1eb767b02 100644

> > --- a/accel/tcg/cpu-exec-common.c

> > +++ b/accel/tcg/cpu-exec-common.c

> > @@ -64,6 +64,8 @@ void cpu_reloading_memory_map(void)

> >

> >   void cpu_loop_exit(CPUState *cpu)

> >   {

> > +    /* Unlock JIT write protect if applicable. */

> > +    tb_exec_unlock();

>

>

> Why do you need to unlock here? I think in general this patch is trying

> to keep the state RW always and only flip to RX when actually executing

> code, right?

Yes, this point is when the code exits due to an interrupt or some
other async means. Otherwise, the unlock would be matched after
tcg_qemu_tb_exec.

-j

>

> I think it would be much easier and cleaner to do it reverse: Keep it in

> RX always and flip to RW when you need to modify.

>

> Also, shouldn't the code gen buffer be allocated with MAP_JIT according

> to the porting guide?

>

> Alex

>
Alexander Graf Nov. 25, 2020, 1:15 a.m. UTC | #5
On 20.11.20 16:58, Joelle van Dyne wrote:
> On Fri, Nov 20, 2020 at 3:08 AM Alexander Graf <agraf@csgraf.de> wrote:

>>

>> On 09.11.20 00:24, Joelle van Dyne wrote:

>>> When entitlements are available (macOS or jailbroken iOS), a hardware

>>> feature called APRR exists on newer Apple Silicon that can cheaply mark JIT

>>> pages as either RX or RW. Reverse engineered functions from

>>> libsystem_pthread.dylib are implemented to handle this.

>>>

>>> The following rules apply for JIT write protect:

>>>     * JIT write-protect is enabled before tcg_qemu_tb_exec()

>>>     * JIT write-protect is disabled after tcg_qemu_tb_exec() returns

>>>     * JIT write-protect is disabled inside do_tb_phys_invalidate() but if it

>>>       is called inside of tcg_qemu_tb_exec() then write-protect will be

>>>       enabled again before returning.

>>>     * JIT write-protect is disabled by cpu_loop_exit() for interrupt handling.

>>>     * JIT write-protect is disabled everywhere else.

>>>

>>> See https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon

>>>

>>> Signed-off-by: Joelle van Dyne <j@getutm.app>

>>> ---

>>>    include/exec/exec-all.h     |  2 +

>>>    include/tcg/tcg-apple-jit.h | 86 +++++++++++++++++++++++++++++++++++++

>>>    include/tcg/tcg.h           |  3 ++

>>>    accel/tcg/cpu-exec-common.c |  2 +

>>>    accel/tcg/cpu-exec.c        |  2 +

>>>    accel/tcg/translate-all.c   | 46 ++++++++++++++++++++

>>>    tcg/tcg.c                   |  4 ++

>>>    7 files changed, 145 insertions(+)

>>>    create mode 100644 include/tcg/tcg-apple-jit.h

>>>

>>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

>>> index aa65103702..3829f3d470 100644

>>> --- a/include/exec/exec-all.h

>>> +++ b/include/exec/exec-all.h

>>> @@ -549,6 +549,8 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,

>>>                                       target_ulong cs_base, uint32_t flags,

>>>                                       uint32_t cf_mask);

>>>    void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);

>>> +void tb_exec_lock(void);

>>> +void tb_exec_unlock(void);

>>>

>>>    /* GETPC is the true target of the return instruction that we'll execute.  */

>>>    #if defined(CONFIG_TCG_INTERPRETER)

>>> diff --git a/include/tcg/tcg-apple-jit.h b/include/tcg/tcg-apple-jit.h

>>> new file mode 100644

>>> index 0000000000..9efdb2000d

>>> --- /dev/null

>>> +++ b/include/tcg/tcg-apple-jit.h

>>> @@ -0,0 +1,86 @@

>>> +/*

>>> + * Apple Silicon functions for JIT handling

>>> + *

>>> + * Copyright (c) 2020 osy

>>> + *

>>> + * This library is free software; you can redistribute it and/or

>>> + * modify it under the terms of the GNU Lesser General Public

>>> + * License as published by the Free Software Foundation; either

>>> + * version 2.1 of the License, or (at your option) any later version.

>>> + *

>>> + * This library is distributed in the hope that it will be useful,

>>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of

>>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

>>> + * Lesser General Public License for more details.

>>> + *

>>> + * You should have received a copy of the GNU Lesser General Public

>>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.

>>> + */

>>> +

>>> +#ifndef TCG_APPLE_JIT_H

>>> +#define TCG_APPLE_JIT_H

>>> +

>>> +/*

>>> + * APRR handling

>>> + * Credits to: https://siguza.github.io/APRR/

>>> + * Reversed from /usr/lib/system/libsystem_pthread.dylib

>>> + */

>>> +

>>> +#if defined(__aarch64__) && defined(CONFIG_DARWIN)

>>> +

>>> +#define _COMM_PAGE_START_ADDRESS        (0x0000000FFFFFC000ULL) /* In TTBR0 */

>>> +#define _COMM_PAGE_APRR_SUPPORT         (_COMM_PAGE_START_ADDRESS + 0x10C)

>>> +#define _COMM_PAGE_APPR_WRITE_ENABLE    (_COMM_PAGE_START_ADDRESS + 0x110)

>>> +#define _COMM_PAGE_APRR_WRITE_DISABLE   (_COMM_PAGE_START_ADDRESS + 0x118)

>>> +

>>> +static __attribute__((__always_inline__)) bool jit_write_protect_supported(void)

>>> +{

>>> +    /* Access shared kernel page at fixed memory location. */

>>> +    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;

>>> +    return aprr_support > 0;

>>> +}

>>> +

>>> +/* write protect enable = write disable */

>>> +static __attribute__((__always_inline__)) void jit_write_protect(int enabled)

>>> +{

>>> +    /* Access shared kernel page at fixed memory location. */

>>> +    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;

>>> +    if (aprr_support == 0 || aprr_support > 3) {

>>> +        return;

>>> +    } else if (aprr_support == 1) {

>>> +        __asm__ __volatile__ (

>>> +            "mov x0, %0\n"

>>> +            "ldr x0, [x0]\n"

>>> +            "msr S3_4_c15_c2_7, x0\n"

>>> +            "isb sy\n"

>>> +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

>>> +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

>>> +            : "memory", "x0"

>>> +        );

>>> +    } else {

>>> +        __asm__ __volatile__ (

>>> +            "mov x0, %0\n"

>>> +            "ldr x0, [x0]\n"

>>> +            "msr S3_6_c15_c1_5, x0\n"

>>> +            "isb sy\n"

>>> +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

>>> +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

>>> +            : "memory", "x0"

>>> +        );

>>> +    }

>>> +}

>>

>> Is there a particular reason you're not just calling

>> pthread_jit_write_protect_np()? That would remove the dependency on

>> anything reverse engineered.

> Those APIs are not available on iOS 13 or below, which has the same

> APRR requirements. If for legal reasons we cannot include this code,

> then it is fine to remove this file and replace the calls with the

> APIs, but we would lose support on lower iOS versions.



I don't think we realistically care about running QEMU on iOS13, do we? 
Let's just focus on making the code maintainable for anything going 
forward from now :).


Alex
Joelle van Dyne Nov. 25, 2020, 2:08 a.m. UTC | #6
A lot of users of UTM are on iOS 13 (a large number of devices only
have jailbreak for iOS 13 and below), but if the QEMU community thinks
it's better that way, we are willing to compromise.

-j

On Tue, Nov 24, 2020 at 7:15 PM Alexander Graf <agraf@csgraf.de> wrote:
>

>

> On 20.11.20 16:58, Joelle van Dyne wrote:

> > On Fri, Nov 20, 2020 at 3:08 AM Alexander Graf <agraf@csgraf.de> wrote:

> >>

> >> On 09.11.20 00:24, Joelle van Dyne wrote:

> >>> When entitlements are available (macOS or jailbroken iOS), a hardware

> >>> feature called APRR exists on newer Apple Silicon that can cheaply mark JIT

> >>> pages as either RX or RW. Reverse engineered functions from

> >>> libsystem_pthread.dylib are implemented to handle this.

> >>>

> >>> The following rules apply for JIT write protect:

> >>>     * JIT write-protect is enabled before tcg_qemu_tb_exec()

> >>>     * JIT write-protect is disabled after tcg_qemu_tb_exec() returns

> >>>     * JIT write-protect is disabled inside do_tb_phys_invalidate() but if it

> >>>       is called inside of tcg_qemu_tb_exec() then write-protect will be

> >>>       enabled again before returning.

> >>>     * JIT write-protect is disabled by cpu_loop_exit() for interrupt handling.

> >>>     * JIT write-protect is disabled everywhere else.

> >>>

> >>> See https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon

> >>>

> >>> Signed-off-by: Joelle van Dyne <j@getutm.app>

> >>> ---

> >>>    include/exec/exec-all.h     |  2 +

> >>>    include/tcg/tcg-apple-jit.h | 86 +++++++++++++++++++++++++++++++++++++

> >>>    include/tcg/tcg.h           |  3 ++

> >>>    accel/tcg/cpu-exec-common.c |  2 +

> >>>    accel/tcg/cpu-exec.c        |  2 +

> >>>    accel/tcg/translate-all.c   | 46 ++++++++++++++++++++

> >>>    tcg/tcg.c                   |  4 ++

> >>>    7 files changed, 145 insertions(+)

> >>>    create mode 100644 include/tcg/tcg-apple-jit.h

> >>>

> >>> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h

> >>> index aa65103702..3829f3d470 100644

> >>> --- a/include/exec/exec-all.h

> >>> +++ b/include/exec/exec-all.h

> >>> @@ -549,6 +549,8 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,

> >>>                                       target_ulong cs_base, uint32_t flags,

> >>>                                       uint32_t cf_mask);

> >>>    void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);

> >>> +void tb_exec_lock(void);

> >>> +void tb_exec_unlock(void);

> >>>

> >>>    /* GETPC is the true target of the return instruction that we'll execute.  */

> >>>    #if defined(CONFIG_TCG_INTERPRETER)

> >>> diff --git a/include/tcg/tcg-apple-jit.h b/include/tcg/tcg-apple-jit.h

> >>> new file mode 100644

> >>> index 0000000000..9efdb2000d

> >>> --- /dev/null

> >>> +++ b/include/tcg/tcg-apple-jit.h

> >>> @@ -0,0 +1,86 @@

> >>> +/*

> >>> + * Apple Silicon functions for JIT handling

> >>> + *

> >>> + * Copyright (c) 2020 osy

> >>> + *

> >>> + * This library is free software; you can redistribute it and/or

> >>> + * modify it under the terms of the GNU Lesser General Public

> >>> + * License as published by the Free Software Foundation; either

> >>> + * version 2.1 of the License, or (at your option) any later version.

> >>> + *

> >>> + * This library is distributed in the hope that it will be useful,

> >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of

> >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> >>> + * Lesser General Public License for more details.

> >>> + *

> >>> + * You should have received a copy of the GNU Lesser General Public

> >>> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.

> >>> + */

> >>> +

> >>> +#ifndef TCG_APPLE_JIT_H

> >>> +#define TCG_APPLE_JIT_H

> >>> +

> >>> +/*

> >>> + * APRR handling

> >>> + * Credits to: https://siguza.github.io/APRR/

> >>> + * Reversed from /usr/lib/system/libsystem_pthread.dylib

> >>> + */

> >>> +

> >>> +#if defined(__aarch64__) && defined(CONFIG_DARWIN)

> >>> +

> >>> +#define _COMM_PAGE_START_ADDRESS        (0x0000000FFFFFC000ULL) /* In TTBR0 */

> >>> +#define _COMM_PAGE_APRR_SUPPORT         (_COMM_PAGE_START_ADDRESS + 0x10C)

> >>> +#define _COMM_PAGE_APPR_WRITE_ENABLE    (_COMM_PAGE_START_ADDRESS + 0x110)

> >>> +#define _COMM_PAGE_APRR_WRITE_DISABLE   (_COMM_PAGE_START_ADDRESS + 0x118)

> >>> +

> >>> +static __attribute__((__always_inline__)) bool jit_write_protect_supported(void)

> >>> +{

> >>> +    /* Access shared kernel page at fixed memory location. */

> >>> +    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;

> >>> +    return aprr_support > 0;

> >>> +}

> >>> +

> >>> +/* write protect enable = write disable */

> >>> +static __attribute__((__always_inline__)) void jit_write_protect(int enabled)

> >>> +{

> >>> +    /* Access shared kernel page at fixed memory location. */

> >>> +    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;

> >>> +    if (aprr_support == 0 || aprr_support > 3) {

> >>> +        return;

> >>> +    } else if (aprr_support == 1) {

> >>> +        __asm__ __volatile__ (

> >>> +            "mov x0, %0\n"

> >>> +            "ldr x0, [x0]\n"

> >>> +            "msr S3_4_c15_c2_7, x0\n"

> >>> +            "isb sy\n"

> >>> +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

> >>> +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

> >>> +            : "memory", "x0"

> >>> +        );

> >>> +    } else {

> >>> +        __asm__ __volatile__ (

> >>> +            "mov x0, %0\n"

> >>> +            "ldr x0, [x0]\n"

> >>> +            "msr S3_6_c15_c1_5, x0\n"

> >>> +            "isb sy\n"

> >>> +            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE

> >>> +                            : _COMM_PAGE_APPR_WRITE_ENABLE)

> >>> +            : "memory", "x0"

> >>> +        );

> >>> +    }

> >>> +}

> >>

> >> Is there a particular reason you're not just calling

> >> pthread_jit_write_protect_np()? That would remove the dependency on

> >> anything reverse engineered.

> > Those APIs are not available on iOS 13 or below, which has the same

> > APRR requirements. If for legal reasons we cannot include this code,

> > then it is fine to remove this file and replace the calls with the

> > APIs, but we would lose support on lower iOS versions.

>

>

> I don't think we realistically care about running QEMU on iOS13, do we?

> Let's just focus on making the code maintainable for anything going

> forward from now :).

>

>

> Alex

>

>
Alexander Graf Dec. 11, 2020, 10:54 a.m. UTC | #7
On 25.11.20 03:08, Joelle van Dyne wrote:
> A lot of users of UTM are on iOS 13 (a large number of devices only

> have jailbreak for iOS 13 and below), but if the QEMU community thinks

> it's better that way, we are willing to compromise.



I think it would make merging much more straight forward if we could 
keep RWX toggling to the publicly released API. So yes, please adapt it. 
In UTM, you can still carry a tiny downstream patch that implements the 
API through your reverse engineered code for iOS 13, no?


Alex
Stefan Hajnoczi Dec. 11, 2020, 12:35 p.m. UTC | #8
On Fri, Dec 11, 2020 at 10:54 AM Alexander Graf <agraf@csgraf.de> wrote:
> On 25.11.20 03:08, Joelle van Dyne wrote:

> > A lot of users of UTM are on iOS 13 (a large number of devices only

> > have jailbreak for iOS 13 and below), but if the QEMU community thinks

> > it's better that way, we are willing to compromise.

>

>

> I think it would make merging much more straight forward if we could

> keep RWX toggling to the publicly released API. So yes, please adapt it.

> In UTM, you can still carry a tiny downstream patch that implements the

> API through your reverse engineered code for iOS 13, no?


Alex, you're awesome! Thanks for finding a way to avoid the
reverse-engineered code. With that change we don't need to go through
a legal review and it makes merging this much simpler.

Stefan
Joelle van Dyne Dec. 11, 2020, 6:47 p.m. UTC | #9
Sounds good, I will make that change in the next version.

-j

On Fri, Dec 11, 2020 at 4:36 AM Stefan Hajnoczi <stefanha@gmail.com> wrote:
>

> On Fri, Dec 11, 2020 at 10:54 AM Alexander Graf <agraf@csgraf.de> wrote:

> > On 25.11.20 03:08, Joelle van Dyne wrote:

> > > A lot of users of UTM are on iOS 13 (a large number of devices only

> > > have jailbreak for iOS 13 and below), but if the QEMU community thinks

> > > it's better that way, we are willing to compromise.

> >

> >

> > I think it would make merging much more straight forward if we could

> > keep RWX toggling to the publicly released API. So yes, please adapt it.

> > In UTM, you can still carry a tiny downstream patch that implements the

> > API through your reverse engineered code for iOS 13, no?

>

> Alex, you're awesome! Thanks for finding a way to avoid the

> reverse-engineered code. With that change we don't need to go through

> a legal review and it makes merging this much simpler.

>

> Stefan
diff mbox series

Patch

diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index aa65103702..3829f3d470 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -549,6 +549,8 @@  TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
                                    target_ulong cs_base, uint32_t flags,
                                    uint32_t cf_mask);
 void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr);
+void tb_exec_lock(void);
+void tb_exec_unlock(void);
 
 /* GETPC is the true target of the return instruction that we'll execute.  */
 #if defined(CONFIG_TCG_INTERPRETER)
diff --git a/include/tcg/tcg-apple-jit.h b/include/tcg/tcg-apple-jit.h
new file mode 100644
index 0000000000..9efdb2000d
--- /dev/null
+++ b/include/tcg/tcg-apple-jit.h
@@ -0,0 +1,86 @@ 
+/*
+ * Apple Silicon functions for JIT handling
+ *
+ * Copyright (c) 2020 osy
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef TCG_APPLE_JIT_H
+#define TCG_APPLE_JIT_H
+
+/*
+ * APRR handling
+ * Credits to: https://siguza.github.io/APRR/
+ * Reversed from /usr/lib/system/libsystem_pthread.dylib
+ */
+
+#if defined(__aarch64__) && defined(CONFIG_DARWIN)
+
+#define _COMM_PAGE_START_ADDRESS        (0x0000000FFFFFC000ULL) /* In TTBR0 */
+#define _COMM_PAGE_APRR_SUPPORT         (_COMM_PAGE_START_ADDRESS + 0x10C)
+#define _COMM_PAGE_APPR_WRITE_ENABLE    (_COMM_PAGE_START_ADDRESS + 0x110)
+#define _COMM_PAGE_APRR_WRITE_DISABLE   (_COMM_PAGE_START_ADDRESS + 0x118)
+
+static __attribute__((__always_inline__)) bool jit_write_protect_supported(void)
+{
+    /* Access shared kernel page at fixed memory location. */
+    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;
+    return aprr_support > 0;
+}
+
+/* write protect enable = write disable */
+static __attribute__((__always_inline__)) void jit_write_protect(int enabled)
+{
+    /* Access shared kernel page at fixed memory location. */
+    uint8_t aprr_support = *(volatile uint8_t *)_COMM_PAGE_APRR_SUPPORT;
+    if (aprr_support == 0 || aprr_support > 3) {
+        return;
+    } else if (aprr_support == 1) {
+        __asm__ __volatile__ (
+            "mov x0, %0\n"
+            "ldr x0, [x0]\n"
+            "msr S3_4_c15_c2_7, x0\n"
+            "isb sy\n"
+            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE
+                            : _COMM_PAGE_APPR_WRITE_ENABLE)
+            : "memory", "x0"
+        );
+    } else {
+        __asm__ __volatile__ (
+            "mov x0, %0\n"
+            "ldr x0, [x0]\n"
+            "msr S3_6_c15_c1_5, x0\n"
+            "isb sy\n"
+            :: "r" (enabled ? _COMM_PAGE_APRR_WRITE_DISABLE
+                            : _COMM_PAGE_APPR_WRITE_ENABLE)
+            : "memory", "x0"
+        );
+    }
+}
+
+#else /* defined(__aarch64__) && defined(CONFIG_DARWIN) */
+
+static __attribute__((__always_inline__)) bool jit_write_protect_supported(void)
+{
+    return false;
+}
+
+static __attribute__((__always_inline__)) void jit_write_protect(int enabled)
+{
+}
+
+#endif
+
+#endif /* define TCG_APPLE_JIT_H */
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 477919aeb6..b16b687d0b 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -625,6 +625,9 @@  struct TCGContext {
     size_t code_gen_buffer_size;
     void *code_gen_ptr;
     void *data_gen_ptr;
+#if defined(CONFIG_DARWIN) && !defined(CONFIG_TCG_INTERPRETER)
+    bool code_gen_locked; /* on Darwin each thread tracks W^X flags */
+#endif
 
     /* Threshold to flush the translated code buffer.  */
     void *code_gen_highwater;
diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c
index 12c1e3e974..f1eb767b02 100644
--- a/accel/tcg/cpu-exec-common.c
+++ b/accel/tcg/cpu-exec-common.c
@@ -64,6 +64,8 @@  void cpu_reloading_memory_map(void)
 
 void cpu_loop_exit(CPUState *cpu)
 {
+    /* Unlock JIT write protect if applicable. */
+    tb_exec_unlock();
     /* Undo the setting in cpu_tb_exec.  */
     cpu->can_do_io = 1;
     siglongjmp(cpu->jmp_env, 1);
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index 8df0a1782e..960e0c1f36 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -176,7 +176,9 @@  static inline TranslationBlock *cpu_tb_exec(CPUState *cpu,
     }
 #endif /* DEBUG_DISAS */
 
+    tb_exec_lock();
     ret = tcg_qemu_tb_exec(env, tb_ptr);
+    tb_exec_unlock();
     cpu->can_do_io = 1;
     /*
      * TODO: Delay swapping back to the read-write region of the TB
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 06102871e7..5773c561cb 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -27,6 +27,9 @@ 
 #include "disas/disas.h"
 #include "exec/exec-all.h"
 #include "tcg/tcg.h"
+#if defined(CONFIG_DARWIN)
+#include "tcg/tcg-apple-jit.h"
+#endif
 #if defined(CONFIG_USER_ONLY)
 #include "qemu.h"
 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
@@ -61,6 +64,9 @@ 
 #include "sysemu/tcg.h"
 #include "qapi/error.h"
 
+static bool tb_exec_is_locked(void);
+static void tb_exec_change(bool locked);
+
 /* #define DEBUG_TB_INVALIDATE */
 /* #define DEBUG_TB_FLUSH */
 /* make various TB consistency checks */
@@ -1339,6 +1345,7 @@  void tcg_exec_init(unsigned long tb_size, int splitwx)
                                splitwx, &error_fatal);
     assert(ok);
 
+    tb_exec_unlock();
 #if defined(CONFIG_SOFTMMU)
     /* There's no guest base to take into account, so go ahead and
        initialize the prologue now.  */
@@ -1615,8 +1622,11 @@  static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
     PageDesc *p;
     uint32_t h;
     tb_page_addr_t phys_pc;
+    bool code_gen_locked;
 
     assert_memory_lock();
+    code_gen_locked = tb_exec_is_locked();
+    tb_exec_unlock();
 
     /* make sure no further incoming jumps will be chained to this TB */
     qemu_spin_lock(&tb->jmp_lock);
@@ -1629,6 +1639,7 @@  static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
                      tb->trace_vcpu_dstate);
     if (!(tb->cflags & CF_NOCACHE) &&
         !qht_remove(&tb_ctx.htable, tb, h)) {
+        tb_exec_change(code_gen_locked);
         return;
     }
 
@@ -1661,6 +1672,8 @@  static void do_tb_phys_invalidate(TranslationBlock *tb, bool rm_from_page_list)
 
     qatomic_set(&tcg_ctx->tb_phys_invalidate_count,
                tcg_ctx->tb_phys_invalidate_count + 1);
+
+    tb_exec_change(code_gen_locked);
 }
 
 static void tb_phys_invalidate__locked(TranslationBlock *tb)
@@ -2899,3 +2912,36 @@  void tcg_flush_softmmu_tlb(CPUState *cs)
     tlb_flush(cs);
 #endif
 }
+
+#if defined(CONFIG_DARWIN) && !defined(CONFIG_TCG_INTERPRETER)
+static bool tb_exec_is_locked(void)
+{
+    return tcg_ctx->code_gen_locked;
+}
+
+static void tb_exec_change(bool locked)
+{
+    if (jit_write_protect_supported()) {
+        jit_write_protect(locked);
+    }
+    tcg_ctx->code_gen_locked = locked;
+}
+#else /* not needed on non-Darwin platforms */
+static bool tb_exec_is_locked(void)
+{
+    return false;
+}
+
+static void tb_exec_change(bool locked) {}
+#endif
+
+void tb_exec_lock(void)
+{
+    /* assumes sys_icache_invalidate already called */
+    tb_exec_change(true);
+}
+
+void tb_exec_unlock(void)
+{
+    tb_exec_change(false);
+}
diff --git a/tcg/tcg.c b/tcg/tcg.c
index d3052031cb..5ed79d2724 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -809,6 +809,8 @@  static void alloc_tcg_plugin_context(TCGContext *s)
 void tcg_register_thread(void)
 {
     tcg_ctx = &tcg_init_ctx;
+
+    tb_exec_unlock();
 }
 #else
 void tcg_register_thread(void)
@@ -843,6 +845,8 @@  void tcg_register_thread(void)
     err = tcg_region_initial_alloc__locked(tcg_ctx);
     g_assert(!err);
     qemu_mutex_unlock(&region.lock);
+
+    tb_exec_unlock();
 }
 #endif /* !CONFIG_USER_ONLY */