diff mbox

target-arm: Minimal implementation of performance counters

Message ID 1303401708-5419-1-git-send-email-peter.maydell@linaro.org
State Accepted
Commit 74594c9d813e4d14e9c16cc71824d8905bedc19d
Headers show

Commit Message

Peter Maydell April 21, 2011, 4:01 p.m. UTC
Newer Linux kernels assume the existence of the performance counter
cp15 registers. Provide a minimal implementation of these registers.
We support no events. This should be compliant with the ARM ARM,
except that we don't implement the cycle counter.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
This is the last fix required to be able to boot a stock Linaro
versatile express image on upstream QEMU...

 target-arm/cpu.h       |    8 ++-
 target-arm/helper.c    |  159 ++++++++++++++++++++++++++++++++++++++++++++----
 target-arm/machine.c   |   12 ++++
 target-arm/translate.c |   20 ++++++-
 4 files changed, 183 insertions(+), 16 deletions(-)

Comments

Aurelien Jarno April 25, 2011, 9:09 p.m. UTC | #1
On Thu, Apr 21, 2011 at 05:01:48PM +0100, Peter Maydell wrote:
> Newer Linux kernels assume the existence of the performance counter
> cp15 registers. Provide a minimal implementation of these registers.
> We support no events. This should be compliant with the ARM ARM,
> except that we don't implement the cycle counter.
> 
> Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
> ---
> This is the last fix required to be able to boot a stock Linaro
> versatile express image on upstream QEMU...
> 
>  target-arm/cpu.h       |    8 ++-
>  target-arm/helper.c    |  159 ++++++++++++++++++++++++++++++++++++++++++++----
>  target-arm/machine.c   |   12 ++++
>  target-arm/translate.c |   20 ++++++-
>  4 files changed, 183 insertions(+), 16 deletions(-)
> 
> diff --git a/target-arm/cpu.h b/target-arm/cpu.h
> index d5af644..b8e7419 100644
> --- a/target-arm/cpu.h
> +++ b/target-arm/cpu.h
> @@ -129,6 +129,12 @@ typedef struct CPUARMState {
>          uint32_t c7_par;  /* Translation result. */
>          uint32_t c9_insn; /* Cache lockdown registers.  */
>          uint32_t c9_data;
> +        uint32_t c9_pmcr; /* performance monitor control register */
> +        uint32_t c9_pmcnten; /* perf monitor counter enables */
> +        uint32_t c9_pmovsr; /* perf monitor overflow status */
> +        uint32_t c9_pmxevtyper; /* perf monitor event type */
> +        uint32_t c9_pmuserenr; /* perf monitor user enable */
> +        uint32_t c9_pminten; /* perf monitor interrupt enables */
>          uint32_t c13_fcse; /* FCSE PID.  */
>          uint32_t c13_context; /* Context ID.  */
>          uint32_t c13_tls1; /* User RW Thread register.  */
> @@ -434,7 +440,7 @@ void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
>  #define cpu_signal_handler cpu_arm_signal_handler
>  #define cpu_list arm_cpu_list
>  
> -#define CPU_SAVE_VERSION 3
> +#define CPU_SAVE_VERSION 4
>  
>  /* MMU modes definitions */
>  #define MMU_MODE0_SUFFIX _kernel
> diff --git a/target-arm/helper.c b/target-arm/helper.c
> index 62ae72e..b051b8c 100644
> --- a/target-arm/helper.c
> +++ b/target-arm/helper.c
> @@ -270,6 +270,10 @@ void cpu_reset(CPUARMState *env)
>      }
>      env->vfp.xregs[ARM_VFP_FPEXC] = 0;
>      env->cp15.c2_base_mask = 0xffffc000u;
> +    /* v7 performance monitor control register: same implementor
> +     * field as main ID register, and we implement no event counters.
> +     */
> +    env->cp15.c9_pmcr = (id & 0xff000000);
>  #endif
>      set_flush_to_zero(1, &env->vfp.standard_fp_status);
>      set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
> @@ -1587,6 +1591,81 @@ void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
>          case 1: /* TCM memory region registers.  */
>              /* Not implemented.  */
>              goto bad_reg;
> +        case 12: /* Performance monitor control */
> +            /* Performance monitors are implementation defined in v7,
> +             * but with an ARM recommended set of registers, which we
> +             * follow (although we don't actually implement any counters)
> +             */
> +            if (!arm_feature(env, ARM_FEATURE_V7)) {
> +                goto bad_reg;
> +            }
> +            switch (op2) {
> +            case 0: /* performance monitor control register */
> +                /* only the DP, X, D and E bits are writable */
> +                env->cp15.c9_pmcr &= ~0x39;
> +                env->cp15.c9_pmcr |= (val & 0x39);
> +                break;
> +            case 1: /* Count enable set register */
> +                val &= (1 << 31);
> +                env->cp15.c9_pmcnten |= val;
> +                break;
> +            case 2: /* Count enable clear */
> +                val &= (1 << 31);
> +                env->cp15.c9_pmcnten &= ~val;
> +                break;
> +            case 3: /* Overflow flag status */
> +                env->cp15.c9_pmovsr &= ~val;
> +                break;
> +            case 4: /* Software increment */
> +                /* RAZ/WI since we don't implement the software-count event */
> +                break;
> +            case 5: /* Event counter selection register */
> +                /* Since we don't implement any events, writing to this register
> +                 * is actually UNPREDICTABLE. So we choose to RAZ/WI.
> +                 */
> +                break;
> +            default:
> +                goto bad_reg;
> +            }
> +            break;
> +        case 13: /* Performance counters */
> +            if (!arm_feature(env, ARM_FEATURE_V7)) {
> +                goto bad_reg;
> +            }
> +            switch (op2) {
> +            case 0: /* Cycle count register: not implemented, so RAZ/WI */
> +                break;
> +            case 1: /* Event type select */
> +                env->cp15.c9_pmxevtyper = val & 0xff;
> +                break;
> +            case 2: /* Event count register */
> +                /* Unimplemented (we have no events), RAZ/WI */
> +                break;
> +            default:
> +                goto bad_reg;
> +            }
> +            break;
> +        case 14: /* Performance monitor control */
> +            if (!arm_feature(env, ARM_FEATURE_V7)) {
> +                goto bad_reg;
> +            }
> +            switch (op2) {
> +            case 0: /* user enable */
> +                env->cp15.c9_pmuserenr = val & 1;
> +                /* changes access rights for cp registers, so flush tbs */
> +                tb_flush(env);

If you flush all tbs, you also have to ensure that on the translate.c
side, this is the last instruction of the tb. Otherwise, the rest of the
TB will be executed with the wrong access rights.

> +                break;
> +            case 1: /* interrupt enable set */
> +                /* We have no event counters so only the C bit can be changed */
> +                val &= (1 << 31);
> +                env->cp15.c9_pminten |= val;
> +                break;
> +            case 2: /* interrupt enable clear */
> +                val &= (1 << 31);
> +                env->cp15.c9_pminten &= ~val;
> +                break;
> +            }
> +            break;
>          default:
>              goto bad_reg;
>          }
> @@ -1878,27 +1957,81 @@ uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
>          return 0;
>      case 8: /* MMU TLB control.  */
>          goto bad_reg;
> -    case 9: /* Cache lockdown.  */
> -        switch (op1) {
> -        case 0: /* L1 cache.  */
> -	    if (arm_feature(env, ARM_FEATURE_OMAPCP))
> -		return 0;
> +    case 9:
> +        switch (crm) {
> +        case 0: /* Cache lockdown */
> +            switch (op1) {
> +            case 0: /* L1 cache.  */
> +                if (arm_feature(env, ARM_FEATURE_OMAPCP)) {
> +                    return 0;
> +                }
> +                switch (op2) {
> +                case 0:
> +                    return env->cp15.c9_data;
> +                case 1:
> +                    return env->cp15.c9_insn;
> +                default:
> +                    goto bad_reg;
> +                }
> +            case 1: /* L2 cache */
> +                if (crm != 0) {
> +                    goto bad_reg;
> +                }
> +                /* L2 Lockdown and Auxiliary control.  */
> +                return 0;
> +            default:
> +                goto bad_reg;
> +            }
> +            break;
> +        case 12: /* Performance monitor control */
> +            if (!arm_feature(env, ARM_FEATURE_V7)) {
> +                goto bad_reg;
> +            }
>              switch (op2) {
> -            case 0:
> -                return env->cp15.c9_data;
> -            case 1:
> -                return env->cp15.c9_insn;
> +            case 0: /* performance monitor control register */
> +                return env->cp15.c9_pmcr;
> +            case 1: /* count enable set */
> +            case 2: /* count enable clear */
> +                return env->cp15.c9_pmcnten;
> +            case 3: /* overflow flag status */
> +                return env->cp15.c9_pmovsr;
> +            case 4: /* software increment */
> +            case 5: /* event counter selection register */
> +                return 0; /* Unimplemented, RAZ/WI */
>              default:
>                  goto bad_reg;
>              }
> -        case 1: /* L2 cache */
> -            if (crm != 0)
> +        case 13: /* Performance counters */
> +            if (!arm_feature(env, ARM_FEATURE_V7)) {
> +                goto bad_reg;
> +            }
> +            switch (op2) {
> +            case 1: /* Event type select */
> +                return env->cp15.c9_pmxevtyper;
> +            case 0: /* Cycle count register */
> +            case 2: /* Event count register */
> +                /* Unimplemented, so RAZ/WI */
> +                return 0;
> +            default:
>                  goto bad_reg;
> -            /* L2 Lockdown and Auxiliary control.  */
> -            return 0;
> +            }
> +        case 14: /* Performance monitor control */
> +            if (!arm_feature(env, ARM_FEATURE_V7)) {
> +                goto bad_reg;
> +            }
> +            switch (op2) {
> +            case 0: /* user enable */
> +                return env->cp15.c9_pmuserenr;
> +            case 1: /* interrupt enable set */
> +            case 2: /* interrupt enable clear */
> +                return env->cp15.c9_pminten;
> +            default:
> +                goto bad_reg;
> +            }
>          default:
>              goto bad_reg;
>          }
> +        break;
>      case 10: /* MMU TLB lockdown.  */
>          /* ??? TLB lockdown not implemented.  */
>          return 0;
> diff --git a/target-arm/machine.c b/target-arm/machine.c
> index a18b7dc..7d4fc54 100644
> --- a/target-arm/machine.c
> +++ b/target-arm/machine.c
> @@ -44,6 +44,12 @@ void cpu_save(QEMUFile *f, void *opaque)
>      qemu_put_be32(f, env->cp15.c7_par);
>      qemu_put_be32(f, env->cp15.c9_insn);
>      qemu_put_be32(f, env->cp15.c9_data);
> +    qemu_put_be32(f, env->cp15.c9_pmcr);
> +    qemu_put_be32(f, env->cp15.c9_pmcnten);
> +    qemu_put_be32(f, env->cp15.c9_pmovsr);
> +    qemu_put_be32(f, env->cp15.c9_pmxevtyper);
> +    qemu_put_be32(f, env->cp15.c9_pmuserenr);
> +    qemu_put_be32(f, env->cp15.c9_pminten);
>      qemu_put_be32(f, env->cp15.c13_fcse);
>      qemu_put_be32(f, env->cp15.c13_context);
>      qemu_put_be32(f, env->cp15.c13_tls1);
> @@ -152,6 +158,12 @@ int cpu_load(QEMUFile *f, void *opaque, int version_id)
>      env->cp15.c7_par = qemu_get_be32(f);
>      env->cp15.c9_insn = qemu_get_be32(f);
>      env->cp15.c9_data = qemu_get_be32(f);
> +    env->cp15.c9_pmcr = qemu_get_be32(f);
> +    env->cp15.c9_pmcnten = qemu_get_be32(f);
> +    env->cp15.c9_pmovsr = qemu_get_be32(f);
> +    env->cp15.c9_pmxevtyper = qemu_get_be32(f);
> +    env->cp15.c9_pmuserenr = qemu_get_be32(f);
> +    env->cp15.c9_pminten = qemu_get_be32(f);
>      env->cp15.c13_fcse = qemu_get_be32(f);
>      env->cp15.c13_context = qemu_get_be32(f);
>      env->cp15.c13_tls1 = qemu_get_be32(f);
> diff --git a/target-arm/translate.c b/target-arm/translate.c
> index e1bda57..ea9cc30 100644
> --- a/target-arm/translate.c
> +++ b/target-arm/translate.c
> @@ -2438,12 +2438,28 @@ static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
>      return 0;
>  }
>  
> -static int cp15_user_ok(uint32_t insn)
> +static int cp15_user_ok(CPUState *env, uint32_t insn)
>  {
>      int cpn = (insn >> 16) & 0xf;
>      int cpm = insn & 0xf;
>      int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
>  
> +    if (arm_feature(env, ARM_FEATURE_V7) && cpn == 9) {
> +        /* Performance monitor registers fall into three categories:
> +         *  (a) always UNDEF in usermode
> +         *  (b) UNDEF only if PMUSERENR.EN is 0
> +         *  (c) always read OK and UNDEF on write (PMUSERENR only)
> +         */
> +        if ((cpm == 12 && (op < 6)) ||
> +            (cpm == 13 && (op < 3))) {
> +            return env->cp15.c9_pmuserenr;
> +        } else if (cpm == 14 && op == 0 && (insn & ARM_CP_RW_BIT)) {
> +            /* PMUSERENR, read only */
> +            return 1;
> +        }
> +        return 0;
> +    }
> +

Instead of having this complex test for all cp15 access, but only for
catching a few access to performance registers, wouldn't it make more
sense to have this test and an exception triggering directly in 
helper.c?

>      if (cpn == 13 && cpm == 0) {
>          /* TLS register.  */
>          if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
> @@ -2530,7 +2546,7 @@ static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
>          /* cdp */
>          return 1;
>      }
> -    if (IS_USER(s) && !cp15_user_ok(insn)) {
> +    if (IS_USER(s) && !cp15_user_ok(env, insn)) {
>          return 1;
>      }
>  
> -- 
> 1.7.1
> 
> 
>
Peter Maydell April 25, 2011, 9:59 p.m. UTC | #2
On 25 April 2011 22:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Thu, Apr 21, 2011 at 05:01:48PM +0100, Peter Maydell wrote:

>> +                tb_flush(env);
>
> If you flush all tbs, you also have to ensure that on the translate.c
> side, this is the last instruction of the tb. Otherwise, the rest of the
> TB will be executed with the wrong access rights.

This is OK, because we can't get here unless we're in privileged
mode (PMUSERENR is never writable in user mode), and changing
PMUSERENR doesn't affect the access rights for privileged mode.
And a switch into user mode will be a change of TB anyway.

(Compare the handling of the TEECR, which also doesn't need to change
TB after a tb_flush(), for the same reasons.)

> Instead of having this complex test for all cp15 access, but only for
> catching a few access to performance registers, wouldn't it make more
> sense to have this test and an exception triggering directly in
> helper.c?

That was what my first design did, but in discussions on IRC
with Paul Brook he basically said that you can't generate an
exception in the helper routine, you have to either generate
runtime code to do the test or throw away the TBs. Unfortunately
I forget the exact rationale, so I've cc'd Paul to remind me :-)

On the subject of complexity: I have vague plans for overhauling
the cp15 support code anyway, so you can effectively register
handler functions for the cp15 registers you care about rather
than having to have one enormous function full of nested case
statements. You could then have the access checking code not
so wildly far away from the register read/write implementation.
(Plus we need support for banked cp15 registers at some point.)

-- PMM
Aurelien Jarno April 25, 2011, 10:31 p.m. UTC | #3
On Mon, Apr 25, 2011 at 10:59:52PM +0100, Peter Maydell wrote:
> On 25 April 2011 22:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > On Thu, Apr 21, 2011 at 05:01:48PM +0100, Peter Maydell wrote:
> 
> >> +                tb_flush(env);
> >
> > If you flush all tbs, you also have to ensure that on the translate.c
> > side, this is the last instruction of the tb. Otherwise, the rest of the
> > TB will be executed with the wrong access rights.
> 
> This is OK, because we can't get here unless we're in privileged
> mode (PMUSERENR is never writable in user mode), and changing
> PMUSERENR doesn't affect the access rights for privileged mode.
> And a switch into user mode will be a change of TB anyway.
> 
> (Compare the handling of the TEECR, which also doesn't need to change
> TB after a tb_flush(), for the same reasons.)

Ok, fine then.

> > Instead of having this complex test for all cp15 access, but only for
> > catching a few access to performance registers, wouldn't it make more
> > sense to have this test and an exception triggering directly in
> > helper.c?
> 
> That was what my first design did, but in discussions on IRC
> with Paul Brook he basically said that you can't generate an
> exception in the helper routine, you have to either generate
> runtime code to do the test or throw away the TBs. Unfortunately
> I forget the exact rationale, so I've cc'd Paul to remind me :-)

This is something strange, plenty of targets are raising exceptions from
helpers without any problem.
Peter Maydell April 25, 2011, 10:35 p.m. UTC | #4
On 25 April 2011 23:31, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Mon, Apr 25, 2011 at 10:59:52PM +0100, Peter Maydell wrote:
>> On 25 April 2011 22:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
>> > Instead of having this complex test for all cp15 access, but only for
>> > catching a few access to performance registers, wouldn't it make more
>> > sense to have this test and an exception triggering directly in
>> > helper.c?
>>
>> That was what my first design did, but in discussions on IRC
>> with Paul Brook he basically said that you can't generate an
>> exception in the helper routine, you have to either generate
>> runtime code to do the test or throw away the TBs. Unfortunately
>> I forget the exact rationale, so I've cc'd Paul to remind me :-)
>
> This is something strange, plenty of targets are raising exceptions from
> helpers without any problem.

You'd at minimum need to move the cp15 helper functions to a different
file, they're currently in helper.c which doesn't get compiled
with access to the global 'env' register. But I got the impression
there was something more significant than that.

-- PMM
Aurelien Jarno April 26, 2011, 10:23 a.m. UTC | #5
On Mon, Apr 25, 2011 at 11:35:54PM +0100, Peter Maydell wrote:
> On 25 April 2011 23:31, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > On Mon, Apr 25, 2011 at 10:59:52PM +0100, Peter Maydell wrote:
> >> On 25 April 2011 22:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
> >> > Instead of having this complex test for all cp15 access, but only for
> >> > catching a few access to performance registers, wouldn't it make more
> >> > sense to have this test and an exception triggering directly in
> >> > helper.c?
> >>
> >> That was what my first design did, but in discussions on IRC
> >> with Paul Brook he basically said that you can't generate an
> >> exception in the helper routine, you have to either generate
> >> runtime code to do the test or throw away the TBs. Unfortunately
> >> I forget the exact rationale, so I've cc'd Paul to remind me :-)
> >
> > This is something strange, plenty of targets are raising exceptions from
> > helpers without any problem.
> 
> You'd at minimum need to move the cp15 helper functions to a different
> file, they're currently in helper.c which doesn't get compiled
> with access to the global 'env' register. But I got the impression
> there was something more significant than that.
> 

I agree, but it's something that has to be done sooner or later anyway.
Peter Maydell May 6, 2011, 2:32 p.m. UTC | #6
On 26 April 2011 11:23, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Mon, Apr 25, 2011 at 11:35:54PM +0100, Peter Maydell wrote:
>> On 25 April 2011 23:31, Aurelien Jarno <aurelien@aurel32.net> wrote:
>> > On Mon, Apr 25, 2011 at 10:59:52PM +0100, Peter Maydell wrote:
>> >> On 25 April 2011 22:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
>> >> > Instead of having this complex test for all cp15 access, but only for
>> >> > catching a few access to performance registers, wouldn't it make more
>> >> > sense to have this test and an exception triggering directly in
>> >> > helper.c?
>> >>
>> >> That was what my first design did, but in discussions on IRC
>> >> with Paul Brook he basically said that you can't generate an
>> >> exception in the helper routine, you have to either generate
>> >> runtime code to do the test or throw away the TBs. Unfortunately
>> >> I forget the exact rationale, so I've cc'd Paul to remind me :-)
>> >
>> > This is something strange, plenty of targets are raising exceptions from
>> > helpers without any problem.
>>
>> You'd at minimum need to move the cp15 helper functions to a different
>> file, they're currently in helper.c which doesn't get compiled
>> with access to the global 'env' register.

> I agree, but it's something that has to be done sooner or later anyway.

I just spoke with Paul on IRC about this. In summary:
 * for a helper to cause an exception then it has (a) to make sure CPU
state (pc, condflags) is sync'd before the call to the helper and (b)
the helper has to be in a file with access to global env, because it
needs to call cpu_loop_exit()
 * (a) is a bit fragile because it's easy to forget and if TCG gets
cleverer things might break in a hard-to-track down way
 * (b) is bad because it increases the set of helper functions accessing
global env
 * and therefore helpers which throw exceptions are a bad idea

For rationale for/arguing about (b) see the comment on this patch:
 http://patchwork.ozlabs.org/patch/94384/

-- PMM
Aurelien Jarno May 14, 2011, 9:32 p.m. UTC | #7
On Fri, May 06, 2011 at 03:32:27PM +0100, Peter Maydell wrote:
> On 26 April 2011 11:23, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > On Mon, Apr 25, 2011 at 11:35:54PM +0100, Peter Maydell wrote:
> >> On 25 April 2011 23:31, Aurelien Jarno <aurelien@aurel32.net> wrote:
> >> > On Mon, Apr 25, 2011 at 10:59:52PM +0100, Peter Maydell wrote:
> >> >> On 25 April 2011 22:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
> >> >> > Instead of having this complex test for all cp15 access, but only for
> >> >> > catching a few access to performance registers, wouldn't it make more
> >> >> > sense to have this test and an exception triggering directly in
> >> >> > helper.c?
> >> >>
> >> >> That was what my first design did, but in discussions on IRC
> >> >> with Paul Brook he basically said that you can't generate an
> >> >> exception in the helper routine, you have to either generate
> >> >> runtime code to do the test or throw away the TBs. Unfortunately
> >> >> I forget the exact rationale, so I've cc'd Paul to remind me :-)
> >> >
> >> > This is something strange, plenty of targets are raising exceptions from
> >> > helpers without any problem.
> >>
> >> You'd at minimum need to move the cp15 helper functions to a different
> >> file, they're currently in helper.c which doesn't get compiled
> >> with access to the global 'env' register.
> 
> > I agree, but it's something that has to be done sooner or later anyway.
> 
> I just spoke with Paul on IRC about this. In summary:
>  * for a helper to cause an exception then it has (a) to make sure CPU
> state (pc, condflags) is sync'd before the call to the helper and (b)
> the helper has to be in a file with access to global env, because it
> needs to call cpu_loop_exit()

I don't think (a) is true. It is possible to use the same way as for
load/store operations, that is call cpu_restore_state() before calling
cpu_loop_exit().

>  * (a) is a bit fragile because it's easy to forget and if TCG gets
> cleverer things might break in a hard-to-track down way
>  * (b) is bad because it increases the set of helper functions accessing
> global env
>  * and therefore helpers which throw exceptions are a bad idea
> 
> For rationale for/arguing about (b) see the comment on this patch:
>  http://patchwork.ozlabs.org/patch/94384/
> 

If you don't really like having env as a fixed host registers (recent
patch series from Blue Swirl seems to want to get rid of this), it is 
possible to pass env as an argument of the helper to do that.

What ever solution is used, we need env for the load/store functions,
and theses functions already provide a framework to restore the CPU
state. I think it's a good idea to use this framework instead of having
a second framework using TCG code for that.
Blue Swirl May 14, 2011, 10:01 p.m. UTC | #8
On Sun, May 15, 2011 at 12:32 AM, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Fri, May 06, 2011 at 03:32:27PM +0100, Peter Maydell wrote:
>> On 26 April 2011 11:23, Aurelien Jarno <aurelien@aurel32.net> wrote:
>> > On Mon, Apr 25, 2011 at 11:35:54PM +0100, Peter Maydell wrote:
>> >> On 25 April 2011 23:31, Aurelien Jarno <aurelien@aurel32.net> wrote:
>> >> > On Mon, Apr 25, 2011 at 10:59:52PM +0100, Peter Maydell wrote:
>> >> >> On 25 April 2011 22:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
>> >> >> > Instead of having this complex test for all cp15 access, but only for
>> >> >> > catching a few access to performance registers, wouldn't it make more
>> >> >> > sense to have this test and an exception triggering directly in
>> >> >> > helper.c?
>> >> >>
>> >> >> That was what my first design did, but in discussions on IRC
>> >> >> with Paul Brook he basically said that you can't generate an
>> >> >> exception in the helper routine, you have to either generate
>> >> >> runtime code to do the test or throw away the TBs. Unfortunately
>> >> >> I forget the exact rationale, so I've cc'd Paul to remind me :-)
>> >> >
>> >> > This is something strange, plenty of targets are raising exceptions from
>> >> > helpers without any problem.
>> >>
>> >> You'd at minimum need to move the cp15 helper functions to a different
>> >> file, they're currently in helper.c which doesn't get compiled
>> >> with access to the global 'env' register.
>>
>> > I agree, but it's something that has to be done sooner or later anyway.
>>
>> I just spoke with Paul on IRC about this. In summary:
>>  * for a helper to cause an exception then it has (a) to make sure CPU
>> state (pc, condflags) is sync'd before the call to the helper and (b)
>> the helper has to be in a file with access to global env, because it
>> needs to call cpu_loop_exit()
>
> I don't think (a) is true. It is possible to use the same way as for
> load/store operations, that is call cpu_restore_state() before calling
> cpu_loop_exit().
>
>>  * (a) is a bit fragile because it's easy to forget and if TCG gets
>> cleverer things might break in a hard-to-track down way
>>  * (b) is bad because it increases the set of helper functions accessing
>> global env
>>  * and therefore helpers which throw exceptions are a bad idea
>>
>> For rationale for/arguing about (b) see the comment on this patch:
>>  http://patchwork.ozlabs.org/patch/94384/
>>
>
> If you don't really like having env as a fixed host registers (recent
> patch series from Blue Swirl seems to want to get rid of this), it is
> possible to pass env as an argument of the helper to do that.
>
> What ever solution is used, we need env for the load/store functions,

Currently this is true, but actually qemu_ld/st only need a pointer to
the TLB. If the address is a constant, some of the TLB calculations
could be performed at translation time. This would need a very
different approach to qemu_ld/st than current one.
Aurelien Jarno May 14, 2011, 10:10 p.m. UTC | #9
On Sun, May 15, 2011 at 01:01:40AM +0300, Blue Swirl wrote:
> On Sun, May 15, 2011 at 12:32 AM, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > On Fri, May 06, 2011 at 03:32:27PM +0100, Peter Maydell wrote:
> >> On 26 April 2011 11:23, Aurelien Jarno <aurelien@aurel32.net> wrote:
> >> > On Mon, Apr 25, 2011 at 11:35:54PM +0100, Peter Maydell wrote:
> >> >> On 25 April 2011 23:31, Aurelien Jarno <aurelien@aurel32.net> wrote:
> >> >> > On Mon, Apr 25, 2011 at 10:59:52PM +0100, Peter Maydell wrote:
> >> >> >> On 25 April 2011 22:09, Aurelien Jarno <aurelien@aurel32.net> wrote:
> >> >> >> > Instead of having this complex test for all cp15 access, but only for
> >> >> >> > catching a few access to performance registers, wouldn't it make more
> >> >> >> > sense to have this test and an exception triggering directly in
> >> >> >> > helper.c?
> >> >> >>
> >> >> >> That was what my first design did, but in discussions on IRC
> >> >> >> with Paul Brook he basically said that you can't generate an
> >> >> >> exception in the helper routine, you have to either generate
> >> >> >> runtime code to do the test or throw away the TBs. Unfortunately
> >> >> >> I forget the exact rationale, so I've cc'd Paul to remind me :-)
> >> >> >
> >> >> > This is something strange, plenty of targets are raising exceptions from
> >> >> > helpers without any problem.
> >> >>
> >> >> You'd at minimum need to move the cp15 helper functions to a different
> >> >> file, they're currently in helper.c which doesn't get compiled
> >> >> with access to the global 'env' register.
> >>
> >> > I agree, but it's something that has to be done sooner or later anyway.
> >>
> >> I just spoke with Paul on IRC about this. In summary:
> >>  * for a helper to cause an exception then it has (a) to make sure CPU
> >> state (pc, condflags) is sync'd before the call to the helper and (b)
> >> the helper has to be in a file with access to global env, because it
> >> needs to call cpu_loop_exit()
> >
> > I don't think (a) is true. It is possible to use the same way as for
> > load/store operations, that is call cpu_restore_state() before calling
> > cpu_loop_exit().
> >
> >>  * (a) is a bit fragile because it's easy to forget and if TCG gets
> >> cleverer things might break in a hard-to-track down way
> >>  * (b) is bad because it increases the set of helper functions accessing
> >> global env
> >>  * and therefore helpers which throw exceptions are a bad idea
> >>
> >> For rationale for/arguing about (b) see the comment on this patch:
> >>  http://patchwork.ozlabs.org/patch/94384/
> >>
> >
> > If you don't really like having env as a fixed host registers (recent
> > patch series from Blue Swirl seems to want to get rid of this), it is
> > possible to pass env as an argument of the helper to do that.
> >
> > What ever solution is used, we need env for the load/store functions,
> 
> Currently this is true, but actually qemu_ld/st only need a pointer to
> the TLB. If the address is a constant, some of the TLB calculations
> could be performed at translation time. This would need a very
> different approach to qemu_ld/st than current one.
> 

That's true for the fastpath, but the slowpath really need to have
access to the env register (for example to access mem_io_pc,
mem_io_vaddr, iotlb).

Also looking at the softmmu code, it seems to be possible to call
cpu_loop_exit() without env, using cpu_single_env instead.
Peter Maydell May 16, 2011, 9:59 a.m. UTC | #10
On 14 May 2011 22:32, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Fri, May 06, 2011 at 03:32:27PM +0100, Peter Maydell wrote:
>> I just spoke with Paul on IRC about this. In summary:
>>  * for a helper to cause an exception then it has (a) to make sure CPU
>> state (pc, condflags) is sync'd before the call to the helper and (b)
>> the helper has to be in a file with access to global env, because it
>> needs to call cpu_loop_exit()
>
> I don't think (a) is true. It is possible to use the same way as for
> load/store operations, that is call cpu_restore_state() before calling
> cpu_loop_exit().

Yes, I think you're right here, I'm not sure why I didn't think that
would work. (b) still applies, though.

> If you don't really like having env as a fixed host registers (recent
> patch series from Blue Swirl seems to want to get rid of this), it is
> possible to pass env as an argument of the helper to do that.

I think really my position on this patch is that it adds
functionality that means you can't actually boot recent Linux
kernels with hw breakpoint support enabled, and I'd rather not
have it get tangled up with either the ongoing "remove AREG0"
discussions or a more general overhaul of how cp15 registers
are handled. It just handles this handful of new registers in
the same way we currently handle all the other cp14/cp15 regs.

> What ever solution is used, we need env for the load/store functions,
> and theses functions already provide a framework to restore the CPU
> state. I think it's a good idea to use this framework instead of having
> a second framework using TCG code for that.

Do you mean you'd like to see us using cpu_restore_state() instead
of explicit state-syncing TCG code for the cases where the exception
is "expected" like SVC instructions? (ie what most targets have
a gen_exception() function for.)

-- PMM
Paul Brook May 16, 2011, 4:10 p.m. UTC | #11
> > I just spoke with Paul on IRC about this. In summary:
> >  * for a helper to cause an exception then it has (a) to make sure CPU
> >
> > state (pc, condflags) is sync'd before the call to the helper and (b)
> > the helper has to be in a file with access to global env, because it
> > needs to call cpu_loop_exit()
> 
> I don't think (a) is true. It is possible to use the same way as for
> load/store operations, that is call cpu_restore_state() before calling
> cpu_loop_exit().

To call cpu_restore_state you need to know searched_pc.  To find that you need 
to unwind the host stack all the way back to translated code.

Paul
Peter Maydell May 16, 2011, 4:37 p.m. UTC | #12
On 16 May 2011 17:10, Paul Brook <paul@codesourcery.com> wrote:
>> > I just spoke with Paul on IRC about this. In summary:
>> >  * for a helper to cause an exception then it has (a) to make sure CPU
>> >
>> > state (pc, condflags) is sync'd before the call to the helper and (b)
>> > the helper has to be in a file with access to global env, because it
>> > needs to call cpu_loop_exit()
>>
>> I don't think (a) is true. It is possible to use the same way as for
>> load/store operations, that is call cpu_restore_state() before calling
>> cpu_loop_exit().
>
> To call cpu_restore_state you need to know searched_pc.  To find that you need
> to unwind the host stack all the way back to translated code.

You can do this by calling GETPC() from the top level helper function
though, right? [OK, we'd need to move the definition out of dyngen-exec.h.]

-- PMM
Aurelien Jarno May 16, 2011, 5:29 p.m. UTC | #13
On Mon, May 16, 2011 at 10:59:47AM +0100, Peter Maydell wrote:
> On 14 May 2011 22:32, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > On Fri, May 06, 2011 at 03:32:27PM +0100, Peter Maydell wrote:
> >> I just spoke with Paul on IRC about this. In summary:
> >>  * for a helper to cause an exception then it has (a) to make sure CPU
> >> state (pc, condflags) is sync'd before the call to the helper and (b)
> >> the helper has to be in a file with access to global env, because it
> >> needs to call cpu_loop_exit()
> >
> > I don't think (a) is true. It is possible to use the same way as for
> > load/store operations, that is call cpu_restore_state() before calling
> > cpu_loop_exit().
> 
> Yes, I think you're right here, I'm not sure why I didn't think that
> would work. (b) still applies, though.
> 
> > If you don't really like having env as a fixed host registers (recent
> > patch series from Blue Swirl seems to want to get rid of this), it is
> > possible to pass env as an argument of the helper to do that.
> 
> I think really my position on this patch is that it adds
> functionality that means you can't actually boot recent Linux
> kernels with hw breakpoint support enabled, and I'd rather not
> have it get tangled up with either the ongoing "remove AREG0"
> discussions or a more general overhaul of how cp15 registers
> are handled. It just handles this handful of new registers in
> the same way we currently handle all the other cp14/cp15 regs.

Well the current discussion is about to know if env access should be
implicit (through a fixed register), or explicit (passed as an argument
to the helper). Blue Swirl is working towards the second solution to see
if it could work or not, so currently I think both options are still
acceptable (both options are currently in use in the current code).

> > What ever solution is used, we need env for the load/store functions,
> > and theses functions already provide a framework to restore the CPU
> > state. I think it's a good idea to use this framework instead of having
> > a second framework using TCG code for that.
> 
> Do you mean you'd like to see us using cpu_restore_state() instead
> of explicit state-syncing TCG code for the cases where the exception
> is "expected" like SVC instructions? (ie what most targets have
> a gen_exception() function for.)
> 

Well maybe this patch is not the best example to use
cpu_restore_state(), but I think we should go toward this direction.
Exceptions as their name suggests are not the rules, so we should avoid
generating code to handle them (like state-syncing TCG code), and use
CPU state restoration, even if it is not fast (that's not a problem, as
exceptions are not the rules).

That said given this patch is more or less an extension of an existing
code, we may want to apply it anyway.
Aurelien Jarno May 16, 2011, 5:29 p.m. UTC | #14
On Mon, May 16, 2011 at 05:37:03PM +0100, Peter Maydell wrote:
> On 16 May 2011 17:10, Paul Brook <paul@codesourcery.com> wrote:
> >> > I just spoke with Paul on IRC about this. In summary:
> >> >  * for a helper to cause an exception then it has (a) to make sure CPU
> >> >
> >> > state (pc, condflags) is sync'd before the call to the helper and (b)
> >> > the helper has to be in a file with access to global env, because it
> >> > needs to call cpu_loop_exit()
> >>
> >> I don't think (a) is true. It is possible to use the same way as for
> >> load/store operations, that is call cpu_restore_state() before calling
> >> cpu_loop_exit().
> >
> > To call cpu_restore_state you need to know searched_pc.  To find that you need
> > to unwind the host stack all the way back to translated code.
> 
> You can do this by calling GETPC() from the top level helper function
> though, right? [OK, we'd need to move the definition out of dyngen-exec.h.]

No we don't need to move it out of dyngen-exec.h. dyngen-exec.h is
included in target-*/exec.h, as the softmmu helpers, which are included
in target-*/op_helper.c, call cpu_restore_state().

For an actual usage of cpu_restore_state() outside of the softmmu
helpers, you can have a look at target-sh4/op-helper.c, which uses this
technique for raising most exceptions, and especially the FPU ones.
Peter Maydell May 16, 2011, 5:47 p.m. UTC | #15
On 16 May 2011 18:29, Aurelien Jarno <aurelien@aurel32.net> wrote:
> On Mon, May 16, 2011 at 05:37:03PM +0100, Peter Maydell wrote:
>> You can do this by calling GETPC() from the top level helper function
>> though, right? [OK, we'd need to move the definition out of dyngen-exec.h.]
>
> No we don't need to move it out of dyngen-exec.h. dyngen-exec.h is
> included in target-*/exec.h, as the softmmu helpers, which are included
> in target-*/op_helper.c, call cpu_restore_state().

I meant, assuming we want to reduce the set of helpers which use
the implicit-global-env (ie: back out the patches which made
helpers other than op_helper.c include exec.h). At the moment
you can't get GETPC() without also getting the global-env which
means you have to be in a source file compiled with the right CFLAGS.
Sorry for the lack of clarity.

> For an actual usage of cpu_restore_state() outside of the softmmu
> helpers, you can have a look at target-sh4/op-helper.c, which uses this
> technique for raising most exceptions, and especially the FPU ones.

Sure. It's in a file which has access to global-env, though.

-- PMM
Peter Maydell May 16, 2011, 5:51 p.m. UTC | #16
On 16 May 2011 18:29, Aurelien Jarno <aurelien@aurel32.net> wrote:
> That said given this patch is more or less an extension of an existing
> code, we may want to apply it anyway.

That is the conclusion I'm hoping to persuade you to, yes :-)

-- PMM
Aurelien Jarno May 16, 2011, 6:06 p.m. UTC | #17
On Mon, May 16, 2011 at 06:47:42PM +0100, Peter Maydell wrote:
> On 16 May 2011 18:29, Aurelien Jarno <aurelien@aurel32.net> wrote:
> > On Mon, May 16, 2011 at 05:37:03PM +0100, Peter Maydell wrote:
> >> You can do this by calling GETPC() from the top level helper function
> >> though, right? [OK, we'd need to move the definition out of dyngen-exec.h.]
> >
> > No we don't need to move it out of dyngen-exec.h. dyngen-exec.h is
> > included in target-*/exec.h, as the softmmu helpers, which are included
> > in target-*/op_helper.c, call cpu_restore_state().
> 
> I meant, assuming we want to reduce the set of helpers which use
> the implicit-global-env (ie: back out the patches which made
> helpers other than op_helper.c include exec.h). At the moment
> you can't get GETPC() without also getting the global-env which
> means you have to be in a source file compiled with the right CFLAGS.
> Sorry for the lack of clarity.

Well, I haven't tried, but it seems you can simply include dyngen-exec.h
directly in the file you want.
Peter Maydell June 15, 2011, 5:39 p.m. UTC | #18
On 16 May 2011 18:51, Peter Maydell <peter.maydell@linaro.org> wrote:
> On 16 May 2011 18:29, Aurelien Jarno <aurelien@aurel32.net> wrote:
>> That said given this patch is more or less an extension of an existing
>> code, we may want to apply it anyway.
>
> That is the conclusion I'm hoping to persuade you to, yes :-)

This patch seems to have got stuck in limbo -- any chance of
a definite ack/nak-and-do-it-like-this/commit from anybody?

[This bug is one of the items on my "ARM issues that must be fixed
in some manner for 0.15 release".]

thanks
-- PMM
diff mbox

Patch

diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index d5af644..b8e7419 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -129,6 +129,12 @@  typedef struct CPUARMState {
         uint32_t c7_par;  /* Translation result. */
         uint32_t c9_insn; /* Cache lockdown registers.  */
         uint32_t c9_data;
+        uint32_t c9_pmcr; /* performance monitor control register */
+        uint32_t c9_pmcnten; /* perf monitor counter enables */
+        uint32_t c9_pmovsr; /* perf monitor overflow status */
+        uint32_t c9_pmxevtyper; /* perf monitor event type */
+        uint32_t c9_pmuserenr; /* perf monitor user enable */
+        uint32_t c9_pminten; /* perf monitor interrupt enables */
         uint32_t c13_fcse; /* FCSE PID.  */
         uint32_t c13_context; /* Context ID.  */
         uint32_t c13_tls1; /* User RW Thread register.  */
@@ -434,7 +440,7 @@  void cpu_arm_set_cp_io(CPUARMState *env, int cpnum,
 #define cpu_signal_handler cpu_arm_signal_handler
 #define cpu_list arm_cpu_list
 
-#define CPU_SAVE_VERSION 3
+#define CPU_SAVE_VERSION 4
 
 /* MMU modes definitions */
 #define MMU_MODE0_SUFFIX _kernel
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 62ae72e..b051b8c 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -270,6 +270,10 @@  void cpu_reset(CPUARMState *env)
     }
     env->vfp.xregs[ARM_VFP_FPEXC] = 0;
     env->cp15.c2_base_mask = 0xffffc000u;
+    /* v7 performance monitor control register: same implementor
+     * field as main ID register, and we implement no event counters.
+     */
+    env->cp15.c9_pmcr = (id & 0xff000000);
 #endif
     set_flush_to_zero(1, &env->vfp.standard_fp_status);
     set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
@@ -1587,6 +1591,81 @@  void HELPER(set_cp15)(CPUState *env, uint32_t insn, uint32_t val)
         case 1: /* TCM memory region registers.  */
             /* Not implemented.  */
             goto bad_reg;
+        case 12: /* Performance monitor control */
+            /* Performance monitors are implementation defined in v7,
+             * but with an ARM recommended set of registers, which we
+             * follow (although we don't actually implement any counters)
+             */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 0: /* performance monitor control register */
+                /* only the DP, X, D and E bits are writable */
+                env->cp15.c9_pmcr &= ~0x39;
+                env->cp15.c9_pmcr |= (val & 0x39);
+                break;
+            case 1: /* Count enable set register */
+                val &= (1 << 31);
+                env->cp15.c9_pmcnten |= val;
+                break;
+            case 2: /* Count enable clear */
+                val &= (1 << 31);
+                env->cp15.c9_pmcnten &= ~val;
+                break;
+            case 3: /* Overflow flag status */
+                env->cp15.c9_pmovsr &= ~val;
+                break;
+            case 4: /* Software increment */
+                /* RAZ/WI since we don't implement the software-count event */
+                break;
+            case 5: /* Event counter selection register */
+                /* Since we don't implement any events, writing to this register
+                 * is actually UNPREDICTABLE. So we choose to RAZ/WI.
+                 */
+                break;
+            default:
+                goto bad_reg;
+            }
+            break;
+        case 13: /* Performance counters */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 0: /* Cycle count register: not implemented, so RAZ/WI */
+                break;
+            case 1: /* Event type select */
+                env->cp15.c9_pmxevtyper = val & 0xff;
+                break;
+            case 2: /* Event count register */
+                /* Unimplemented (we have no events), RAZ/WI */
+                break;
+            default:
+                goto bad_reg;
+            }
+            break;
+        case 14: /* Performance monitor control */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 0: /* user enable */
+                env->cp15.c9_pmuserenr = val & 1;
+                /* changes access rights for cp registers, so flush tbs */
+                tb_flush(env);
+                break;
+            case 1: /* interrupt enable set */
+                /* We have no event counters so only the C bit can be changed */
+                val &= (1 << 31);
+                env->cp15.c9_pminten |= val;
+                break;
+            case 2: /* interrupt enable clear */
+                val &= (1 << 31);
+                env->cp15.c9_pminten &= ~val;
+                break;
+            }
+            break;
         default:
             goto bad_reg;
         }
@@ -1878,27 +1957,81 @@  uint32_t HELPER(get_cp15)(CPUState *env, uint32_t insn)
         return 0;
     case 8: /* MMU TLB control.  */
         goto bad_reg;
-    case 9: /* Cache lockdown.  */
-        switch (op1) {
-        case 0: /* L1 cache.  */
-	    if (arm_feature(env, ARM_FEATURE_OMAPCP))
-		return 0;
+    case 9:
+        switch (crm) {
+        case 0: /* Cache lockdown */
+            switch (op1) {
+            case 0: /* L1 cache.  */
+                if (arm_feature(env, ARM_FEATURE_OMAPCP)) {
+                    return 0;
+                }
+                switch (op2) {
+                case 0:
+                    return env->cp15.c9_data;
+                case 1:
+                    return env->cp15.c9_insn;
+                default:
+                    goto bad_reg;
+                }
+            case 1: /* L2 cache */
+                if (crm != 0) {
+                    goto bad_reg;
+                }
+                /* L2 Lockdown and Auxiliary control.  */
+                return 0;
+            default:
+                goto bad_reg;
+            }
+            break;
+        case 12: /* Performance monitor control */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
             switch (op2) {
-            case 0:
-                return env->cp15.c9_data;
-            case 1:
-                return env->cp15.c9_insn;
+            case 0: /* performance monitor control register */
+                return env->cp15.c9_pmcr;
+            case 1: /* count enable set */
+            case 2: /* count enable clear */
+                return env->cp15.c9_pmcnten;
+            case 3: /* overflow flag status */
+                return env->cp15.c9_pmovsr;
+            case 4: /* software increment */
+            case 5: /* event counter selection register */
+                return 0; /* Unimplemented, RAZ/WI */
             default:
                 goto bad_reg;
             }
-        case 1: /* L2 cache */
-            if (crm != 0)
+        case 13: /* Performance counters */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 1: /* Event type select */
+                return env->cp15.c9_pmxevtyper;
+            case 0: /* Cycle count register */
+            case 2: /* Event count register */
+                /* Unimplemented, so RAZ/WI */
+                return 0;
+            default:
                 goto bad_reg;
-            /* L2 Lockdown and Auxiliary control.  */
-            return 0;
+            }
+        case 14: /* Performance monitor control */
+            if (!arm_feature(env, ARM_FEATURE_V7)) {
+                goto bad_reg;
+            }
+            switch (op2) {
+            case 0: /* user enable */
+                return env->cp15.c9_pmuserenr;
+            case 1: /* interrupt enable set */
+            case 2: /* interrupt enable clear */
+                return env->cp15.c9_pminten;
+            default:
+                goto bad_reg;
+            }
         default:
             goto bad_reg;
         }
+        break;
     case 10: /* MMU TLB lockdown.  */
         /* ??? TLB lockdown not implemented.  */
         return 0;
diff --git a/target-arm/machine.c b/target-arm/machine.c
index a18b7dc..7d4fc54 100644
--- a/target-arm/machine.c
+++ b/target-arm/machine.c
@@ -44,6 +44,12 @@  void cpu_save(QEMUFile *f, void *opaque)
     qemu_put_be32(f, env->cp15.c7_par);
     qemu_put_be32(f, env->cp15.c9_insn);
     qemu_put_be32(f, env->cp15.c9_data);
+    qemu_put_be32(f, env->cp15.c9_pmcr);
+    qemu_put_be32(f, env->cp15.c9_pmcnten);
+    qemu_put_be32(f, env->cp15.c9_pmovsr);
+    qemu_put_be32(f, env->cp15.c9_pmxevtyper);
+    qemu_put_be32(f, env->cp15.c9_pmuserenr);
+    qemu_put_be32(f, env->cp15.c9_pminten);
     qemu_put_be32(f, env->cp15.c13_fcse);
     qemu_put_be32(f, env->cp15.c13_context);
     qemu_put_be32(f, env->cp15.c13_tls1);
@@ -152,6 +158,12 @@  int cpu_load(QEMUFile *f, void *opaque, int version_id)
     env->cp15.c7_par = qemu_get_be32(f);
     env->cp15.c9_insn = qemu_get_be32(f);
     env->cp15.c9_data = qemu_get_be32(f);
+    env->cp15.c9_pmcr = qemu_get_be32(f);
+    env->cp15.c9_pmcnten = qemu_get_be32(f);
+    env->cp15.c9_pmovsr = qemu_get_be32(f);
+    env->cp15.c9_pmxevtyper = qemu_get_be32(f);
+    env->cp15.c9_pmuserenr = qemu_get_be32(f);
+    env->cp15.c9_pminten = qemu_get_be32(f);
     env->cp15.c13_fcse = qemu_get_be32(f);
     env->cp15.c13_context = qemu_get_be32(f);
     env->cp15.c13_tls1 = qemu_get_be32(f);
diff --git a/target-arm/translate.c b/target-arm/translate.c
index e1bda57..ea9cc30 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -2438,12 +2438,28 @@  static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
     return 0;
 }
 
-static int cp15_user_ok(uint32_t insn)
+static int cp15_user_ok(CPUState *env, uint32_t insn)
 {
     int cpn = (insn >> 16) & 0xf;
     int cpm = insn & 0xf;
     int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
 
+    if (arm_feature(env, ARM_FEATURE_V7) && cpn == 9) {
+        /* Performance monitor registers fall into three categories:
+         *  (a) always UNDEF in usermode
+         *  (b) UNDEF only if PMUSERENR.EN is 0
+         *  (c) always read OK and UNDEF on write (PMUSERENR only)
+         */
+        if ((cpm == 12 && (op < 6)) ||
+            (cpm == 13 && (op < 3))) {
+            return env->cp15.c9_pmuserenr;
+        } else if (cpm == 14 && op == 0 && (insn & ARM_CP_RW_BIT)) {
+            /* PMUSERENR, read only */
+            return 1;
+        }
+        return 0;
+    }
+
     if (cpn == 13 && cpm == 0) {
         /* TLS register.  */
         if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
@@ -2530,7 +2546,7 @@  static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
         /* cdp */
         return 1;
     }
-    if (IS_USER(s) && !cp15_user_ok(insn)) {
+    if (IS_USER(s) && !cp15_user_ok(env, insn)) {
         return 1;
     }