diff mbox

[v3,2/2] xen/arm32: implement VFP context switch

Message ID 1370268044-23140-3-git-send-email-julien.grall@linaro.org
State Superseded, archived
Headers show

Commit Message

Julien Grall June 3, 2013, 2 p.m. UTC
Add support for VFP context switch on arm32 and a dummy support for arm64

Signed-off-by: Julien Grall <julien.grall@linaro.org>

Changes in v3:
    - Add vfp_init to check if the processor supports VFP 3
    - Add clobber memory
    - Remove tmps
    - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h

Changes in v2:
    - Fix all the small errors (type, lost headers...)
    - Add some comments
---
 xen/arch/arm/arm32/Makefile     |    1 +
 xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/arm64/Makefile     |    1 +
 xen/arch/arm/arm64/vfp.c        |   13 +++++
 xen/arch/arm/domain.c           |    7 ++-
 xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
 xen/include/asm-arm/arm64/vfp.h |   16 +++++++
 xen/include/asm-arm/cpregs.h    |    9 ++++
 xen/include/asm-arm/domain.h    |    4 ++
 xen/include/asm-arm/vfp.h       |   25 ++++++++++
 10 files changed, 214 insertions(+), 2 deletions(-)
 create mode 100644 xen/arch/arm/arm32/vfp.c
 create mode 100644 xen/arch/arm/arm64/vfp.c
 create mode 100644 xen/include/asm-arm/arm32/vfp.h
 create mode 100644 xen/include/asm-arm/arm64/vfp.h
 create mode 100644 xen/include/asm-arm/vfp.h

Comments

Ian Campbell June 3, 2013, 2:15 p.m. UTC | #1
On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
> Add support for VFP context switch on arm32 and a dummy support for arm64
> 
> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> 
> Changes in v3:
>     - Add vfp_init to check if the processor supports VFP 3
>     - Add clobber memory
>     - Remove tmps
>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
> 
> Changes in v2:
>     - Fix all the small errors (type, lost headers...)
>     - Add some comments
> ---
>  xen/arch/arm/arm32/Makefile     |    1 +
>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
>  xen/arch/arm/arm64/Makefile     |    1 +
>  xen/arch/arm/arm64/vfp.c        |   13 +++++
>  xen/arch/arm/domain.c           |    7 ++-
>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
>  xen/include/asm-arm/cpregs.h    |    9 ++++
>  xen/include/asm-arm/domain.h    |    4 ++
>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
>  10 files changed, 214 insertions(+), 2 deletions(-)
>  create mode 100644 xen/arch/arm/arm32/vfp.c
>  create mode 100644 xen/arch/arm/arm64/vfp.c
>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
>  create mode 100644 xen/include/asm-arm/vfp.h
> 
> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
> index aaf277a..b903803 100644
> --- a/xen/arch/arm/arm32/Makefile
> +++ b/xen/arch/arm/arm32/Makefile
> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
>  
>  obj-y += traps.o
>  obj-y += domain.o
> +obj-y += vfp.o
>  
>  obj-$(EARLY_PRINTK) += debug.o
> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
> new file mode 100644
> index 0000000..2ece43d
> --- /dev/null
> +++ b/xen/arch/arm/arm32/vfp.c
> @@ -0,0 +1,99 @@
> +#include <xen/sched.h>
> +#include <xen/init.h>
> +#include <asm/processor.h>
> +#include <asm/vfp.h>
> +
> +void vfp_save_state(struct vcpu *v)
> +{
> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> +
> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
> +
> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
> +
> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
> +    {
> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
> +
> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
> +        /* Disable FPEXC_EX */
> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
> +    }
> +
> +    /* Save {d0-d15} */
> +    asm volatile("stc p11, cr0, [%0], #32*4"
> +                 : : "r" (v->arch.vfp.fpregs1)
> +                 : "memory");

http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
"=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
the job without clobbering the whole of memory.

I can't see any examples of this on ARM though, e.g. in Linux (other
than =Qo in the atomics) though and I've expected this patter to be
widespread so perhaps that isn't right. Alternatively it's quite a
tricky construct to grep for, so maybe I missed all the uses. We do use
=m a fair bit in x86 Xen FWIW.

> static __init int vfp_init(void)
> +{
> +    unsigned int vfpsid;
> +    unsigned int vfparch;
> +
> +    vfpsid = READ_CP32(FPSID);
> +
> +    printk("VFP implementer 0x%02x architecture %d part 0x%02x variant 0x%x "
> +           "rev 0x%x\n",
> +           (vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT,
> +           (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT,
> +           (vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT,
> +           (vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT,
> +           (vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT);
> +
> +    vfparch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT;
> +    if ( vfparch < 2 )
> +        panic("Xen only support VFP 3\n");

Should be "supports". Don't bother resending just for that though.

Ian.
Julien Grall June 3, 2013, 2:32 p.m. UTC | #2
On 06/03/2013 03:15 PM, Ian Campbell wrote:

> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
>> Add support for VFP context switch on arm32 and a dummy support for arm64
>>
>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>
>> Changes in v3:
>>     - Add vfp_init to check if the processor supports VFP 3
>>     - Add clobber memory
>>     - Remove tmps
>>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
>>
>> Changes in v2:
>>     - Fix all the small errors (type, lost headers...)
>>     - Add some comments
>> ---
>>  xen/arch/arm/arm32/Makefile     |    1 +
>>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
>>  xen/arch/arm/arm64/Makefile     |    1 +
>>  xen/arch/arm/arm64/vfp.c        |   13 +++++
>>  xen/arch/arm/domain.c           |    7 ++-
>>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
>>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
>>  xen/include/asm-arm/cpregs.h    |    9 ++++
>>  xen/include/asm-arm/domain.h    |    4 ++
>>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
>>  10 files changed, 214 insertions(+), 2 deletions(-)
>>  create mode 100644 xen/arch/arm/arm32/vfp.c
>>  create mode 100644 xen/arch/arm/arm64/vfp.c
>>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
>>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
>>  create mode 100644 xen/include/asm-arm/vfp.h
>>
>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
>> index aaf277a..b903803 100644
>> --- a/xen/arch/arm/arm32/Makefile
>> +++ b/xen/arch/arm/arm32/Makefile
>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
>>  
>>  obj-y += traps.o
>>  obj-y += domain.o
>> +obj-y += vfp.o
>>  
>>  obj-$(EARLY_PRINTK) += debug.o
>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
>> new file mode 100644
>> index 0000000..2ece43d
>> --- /dev/null
>> +++ b/xen/arch/arm/arm32/vfp.c
>> @@ -0,0 +1,99 @@
>> +#include <xen/sched.h>
>> +#include <xen/init.h>
>> +#include <asm/processor.h>
>> +#include <asm/vfp.h>
>> +
>> +void vfp_save_state(struct vcpu *v)
>> +{
>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
>> +
>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
>> +
>> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
>> +
>> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
>> +    {
>> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
>> +
>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
>> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
>> +        /* Disable FPEXC_EX */
>> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
>> +    }
>> +
>> +    /* Save {d0-d15} */
>> +    asm volatile("stc p11, cr0, [%0], #32*4"
>> +                 : : "r" (v->arch.vfp.fpregs1)
>> +                 : "memory");
> 
> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
> the job without clobbering the whole of memory.

I'm not sure to fully understand the concept behind "=m". Does it mean
that gcc will clobber all the memory range addressed by fpregs?
Ian Campbell June 3, 2013, 2:38 p.m. UTC | #3
On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote:
> On 06/03/2013 03:15 PM, Ian Campbell wrote:
> 
> > On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
> >> Add support for VFP context switch on arm32 and a dummy support for arm64
> >>
> >> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> >>
> >> Changes in v3:
> >>     - Add vfp_init to check if the processor supports VFP 3
> >>     - Add clobber memory
> >>     - Remove tmps
> >>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
> >>
> >> Changes in v2:
> >>     - Fix all the small errors (type, lost headers...)
> >>     - Add some comments
> >> ---
> >>  xen/arch/arm/arm32/Makefile     |    1 +
> >>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
> >>  xen/arch/arm/arm64/Makefile     |    1 +
> >>  xen/arch/arm/arm64/vfp.c        |   13 +++++
> >>  xen/arch/arm/domain.c           |    7 ++-
> >>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
> >>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
> >>  xen/include/asm-arm/cpregs.h    |    9 ++++
> >>  xen/include/asm-arm/domain.h    |    4 ++
> >>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
> >>  10 files changed, 214 insertions(+), 2 deletions(-)
> >>  create mode 100644 xen/arch/arm/arm32/vfp.c
> >>  create mode 100644 xen/arch/arm/arm64/vfp.c
> >>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
> >>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
> >>  create mode 100644 xen/include/asm-arm/vfp.h
> >>
> >> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
> >> index aaf277a..b903803 100644
> >> --- a/xen/arch/arm/arm32/Makefile
> >> +++ b/xen/arch/arm/arm32/Makefile
> >> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
> >>  
> >>  obj-y += traps.o
> >>  obj-y += domain.o
> >> +obj-y += vfp.o
> >>  
> >>  obj-$(EARLY_PRINTK) += debug.o
> >> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
> >> new file mode 100644
> >> index 0000000..2ece43d
> >> --- /dev/null
> >> +++ b/xen/arch/arm/arm32/vfp.c
> >> @@ -0,0 +1,99 @@
> >> +#include <xen/sched.h>
> >> +#include <xen/init.h>
> >> +#include <asm/processor.h>
> >> +#include <asm/vfp.h>
> >> +
> >> +void vfp_save_state(struct vcpu *v)
> >> +{
> >> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> >> +
> >> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
> >> +
> >> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
> >> +
> >> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
> >> +    {
> >> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
> >> +
> >> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
> >> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
> >> +        /* Disable FPEXC_EX */
> >> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
> >> +    }
> >> +
> >> +    /* Save {d0-d15} */
> >> +    asm volatile("stc p11, cr0, [%0], #32*4"
> >> +                 : : "r" (v->arch.vfp.fpregs1)
> >> +                 : "memory");
> > 
> > http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
> > "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
> > the job without clobbering the whole of memory.
> 
> I'm not sure to fully understand the concept behind "=m". Does it mean
> that gcc will clobber all the memory range addressed by fpregs?

I'm not totally confident in this stuff myself....

Apparently the "=" modified means[0] "this operand is write-only for
this instruction: the previous value is discarded and replaced by output
data." In the case of a memory constraint you'd want to hope that
"discarded and replaced" would be equivalent to clobbering the address,
at least in cases where the compiler knows the size of the thing, as it
does here.

Ian.

[0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers
Julien Grall June 3, 2013, 2:57 p.m. UTC | #4
On 06/03/2013 03:38 PM, Ian Campbell wrote:

> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote:
>> On 06/03/2013 03:15 PM, Ian Campbell wrote:
>>
>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
>>>> Add support for VFP context switch on arm32 and a dummy support for arm64
>>>>
>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>>>
>>>> Changes in v3:
>>>>     - Add vfp_init to check if the processor supports VFP 3
>>>>     - Add clobber memory
>>>>     - Remove tmps
>>>>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
>>>>
>>>> Changes in v2:
>>>>     - Fix all the small errors (type, lost headers...)
>>>>     - Add some comments
>>>> ---
>>>>  xen/arch/arm/arm32/Makefile     |    1 +
>>>>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
>>>>  xen/arch/arm/arm64/Makefile     |    1 +
>>>>  xen/arch/arm/arm64/vfp.c        |   13 +++++
>>>>  xen/arch/arm/domain.c           |    7 ++-
>>>>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
>>>>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
>>>>  xen/include/asm-arm/cpregs.h    |    9 ++++
>>>>  xen/include/asm-arm/domain.h    |    4 ++
>>>>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
>>>>  10 files changed, 214 insertions(+), 2 deletions(-)
>>>>  create mode 100644 xen/arch/arm/arm32/vfp.c
>>>>  create mode 100644 xen/arch/arm/arm64/vfp.c
>>>>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
>>>>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
>>>>  create mode 100644 xen/include/asm-arm/vfp.h
>>>>
>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
>>>> index aaf277a..b903803 100644
>>>> --- a/xen/arch/arm/arm32/Makefile
>>>> +++ b/xen/arch/arm/arm32/Makefile
>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
>>>>  
>>>>  obj-y += traps.o
>>>>  obj-y += domain.o
>>>> +obj-y += vfp.o
>>>>  
>>>>  obj-$(EARLY_PRINTK) += debug.o
>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
>>>> new file mode 100644
>>>> index 0000000..2ece43d
>>>> --- /dev/null
>>>> +++ b/xen/arch/arm/arm32/vfp.c
>>>> @@ -0,0 +1,99 @@
>>>> +#include <xen/sched.h>
>>>> +#include <xen/init.h>
>>>> +#include <asm/processor.h>
>>>> +#include <asm/vfp.h>
>>>> +
>>>> +void vfp_save_state(struct vcpu *v)
>>>> +{
>>>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
>>>> +
>>>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
>>>> +
>>>> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
>>>> +
>>>> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
>>>> +    {
>>>> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
>>>> +
>>>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
>>>> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
>>>> +        /* Disable FPEXC_EX */
>>>> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
>>>> +    }
>>>> +
>>>> +    /* Save {d0-d15} */
>>>> +    asm volatile("stc p11, cr0, [%0], #32*4"
>>>> +                 : : "r" (v->arch.vfp.fpregs1)
>>>> +                 : "memory");
>>>
>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
>>> the job without clobbering the whole of memory.
>>
>> I'm not sure to fully understand the concept behind "=m". Does it mean
>> that gcc will clobber all the memory range addressed by fpregs?
> 
> I'm not totally confident in this stuff myself....
> 
> Apparently the "=" modified means[0] "this operand is write-only for
> this instruction: the previous value is discarded and replaced by output
> data." In the case of a memory constraint you'd want to hope that
> "discarded and replaced" would be equivalent to clobbering the address,
> at least in cases where the compiler knows the size of the thing, as it
> does here.
> 
> Ian.
> 
> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers
> 


Shall I use this contrainst for the stc instructions and send a new
patch series?
Ian Campbell June 3, 2013, 3:14 p.m. UTC | #5
On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote:
> On 06/03/2013 03:38 PM, Ian Campbell wrote:
> 
> > On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote:
> >> On 06/03/2013 03:15 PM, Ian Campbell wrote:
> >>
> >>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
> >>>> Add support for VFP context switch on arm32 and a dummy support for arm64
> >>>>
> >>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> >>>>
> >>>> Changes in v3:
> >>>>     - Add vfp_init to check if the processor supports VFP 3
> >>>>     - Add clobber memory
> >>>>     - Remove tmps
> >>>>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
> >>>>
> >>>> Changes in v2:
> >>>>     - Fix all the small errors (type, lost headers...)
> >>>>     - Add some comments
> >>>> ---
> >>>>  xen/arch/arm/arm32/Makefile     |    1 +
> >>>>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
> >>>>  xen/arch/arm/arm64/Makefile     |    1 +
> >>>>  xen/arch/arm/arm64/vfp.c        |   13 +++++
> >>>>  xen/arch/arm/domain.c           |    7 ++-
> >>>>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
> >>>>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
> >>>>  xen/include/asm-arm/cpregs.h    |    9 ++++
> >>>>  xen/include/asm-arm/domain.h    |    4 ++
> >>>>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
> >>>>  10 files changed, 214 insertions(+), 2 deletions(-)
> >>>>  create mode 100644 xen/arch/arm/arm32/vfp.c
> >>>>  create mode 100644 xen/arch/arm/arm64/vfp.c
> >>>>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
> >>>>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
> >>>>  create mode 100644 xen/include/asm-arm/vfp.h
> >>>>
> >>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
> >>>> index aaf277a..b903803 100644
> >>>> --- a/xen/arch/arm/arm32/Makefile
> >>>> +++ b/xen/arch/arm/arm32/Makefile
> >>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
> >>>>  
> >>>>  obj-y += traps.o
> >>>>  obj-y += domain.o
> >>>> +obj-y += vfp.o
> >>>>  
> >>>>  obj-$(EARLY_PRINTK) += debug.o
> >>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
> >>>> new file mode 100644
> >>>> index 0000000..2ece43d
> >>>> --- /dev/null
> >>>> +++ b/xen/arch/arm/arm32/vfp.c
> >>>> @@ -0,0 +1,99 @@
> >>>> +#include <xen/sched.h>
> >>>> +#include <xen/init.h>
> >>>> +#include <asm/processor.h>
> >>>> +#include <asm/vfp.h>
> >>>> +
> >>>> +void vfp_save_state(struct vcpu *v)
> >>>> +{
> >>>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> >>>> +
> >>>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
> >>>> +
> >>>> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
> >>>> +
> >>>> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
> >>>> +    {
> >>>> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
> >>>> +
> >>>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
> >>>> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
> >>>> +        /* Disable FPEXC_EX */
> >>>> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
> >>>> +    }
> >>>> +
> >>>> +    /* Save {d0-d15} */
> >>>> +    asm volatile("stc p11, cr0, [%0], #32*4"
> >>>> +                 : : "r" (v->arch.vfp.fpregs1)
> >>>> +                 : "memory");
> >>>
> >>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
> >>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
> >>> the job without clobbering the whole of memory.
> >>
> >> I'm not sure to fully understand the concept behind "=m". Does it mean
> >> that gcc will clobber all the memory range addressed by fpregs?
> > 
> > I'm not totally confident in this stuff myself....
> > 
> > Apparently the "=" modified means[0] "this operand is write-only for
> > this instruction: the previous value is discarded and replaced by output
> > data." In the case of a memory constraint you'd want to hope that
> > "discarded and replaced" would be equivalent to clobbering the address,
> > at least in cases where the compiler knows the size of the thing, as it
> > does here.
> > 
> > Ian.
> > 
> > [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers
> > 
> 
> 
> Shall I use this contrainst for the stc instructions and send a new
> patch series?

If you buy my reasoning and if it works then I guess so.

Ian.
Julien Grall June 3, 2013, 3:27 p.m. UTC | #6
On 06/03/2013 04:14 PM, Ian Campbell wrote:

> On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote:
>> On 06/03/2013 03:38 PM, Ian Campbell wrote:
>>
>>> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote:
>>>> On 06/03/2013 03:15 PM, Ian Campbell wrote:
>>>>
>>>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
>>>>>> Add support for VFP context switch on arm32 and a dummy support for arm64
>>>>>>
>>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>>>>>
>>>>>> Changes in v3:
>>>>>>     - Add vfp_init to check if the processor supports VFP 3
>>>>>>     - Add clobber memory
>>>>>>     - Remove tmps
>>>>>>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
>>>>>>
>>>>>> Changes in v2:
>>>>>>     - Fix all the small errors (type, lost headers...)
>>>>>>     - Add some comments
>>>>>> ---
>>>>>>  xen/arch/arm/arm32/Makefile     |    1 +
>>>>>>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
>>>>>>  xen/arch/arm/arm64/Makefile     |    1 +
>>>>>>  xen/arch/arm/arm64/vfp.c        |   13 +++++
>>>>>>  xen/arch/arm/domain.c           |    7 ++-
>>>>>>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
>>>>>>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
>>>>>>  xen/include/asm-arm/cpregs.h    |    9 ++++
>>>>>>  xen/include/asm-arm/domain.h    |    4 ++
>>>>>>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
>>>>>>  10 files changed, 214 insertions(+), 2 deletions(-)
>>>>>>  create mode 100644 xen/arch/arm/arm32/vfp.c
>>>>>>  create mode 100644 xen/arch/arm/arm64/vfp.c
>>>>>>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
>>>>>>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
>>>>>>  create mode 100644 xen/include/asm-arm/vfp.h
>>>>>>
>>>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
>>>>>> index aaf277a..b903803 100644
>>>>>> --- a/xen/arch/arm/arm32/Makefile
>>>>>> +++ b/xen/arch/arm/arm32/Makefile
>>>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
>>>>>>  
>>>>>>  obj-y += traps.o
>>>>>>  obj-y += domain.o
>>>>>> +obj-y += vfp.o
>>>>>>  
>>>>>>  obj-$(EARLY_PRINTK) += debug.o
>>>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
>>>>>> new file mode 100644
>>>>>> index 0000000..2ece43d
>>>>>> --- /dev/null
>>>>>> +++ b/xen/arch/arm/arm32/vfp.c
>>>>>> @@ -0,0 +1,99 @@
>>>>>> +#include <xen/sched.h>
>>>>>> +#include <xen/init.h>
>>>>>> +#include <asm/processor.h>
>>>>>> +#include <asm/vfp.h>
>>>>>> +
>>>>>> +void vfp_save_state(struct vcpu *v)
>>>>>> +{
>>>>>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
>>>>>> +
>>>>>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
>>>>>> +
>>>>>> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
>>>>>> +
>>>>>> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
>>>>>> +    {
>>>>>> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
>>>>>> +
>>>>>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
>>>>>> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
>>>>>> +        /* Disable FPEXC_EX */
>>>>>> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
>>>>>> +    }
>>>>>> +
>>>>>> +    /* Save {d0-d15} */
>>>>>> +    asm volatile("stc p11, cr0, [%0], #32*4"
>>>>>> +                 : : "r" (v->arch.vfp.fpregs1)
>>>>>> +                 : "memory");
>>>>>
>>>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
>>>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
>>>>> the job without clobbering the whole of memory.
>>>>
>>>> I'm not sure to fully understand the concept behind "=m". Does it mean
>>>> that gcc will clobber all the memory range addressed by fpregs?
>>>
>>> I'm not totally confident in this stuff myself....
>>>
>>> Apparently the "=" modified means[0] "this operand is write-only for
>>> this instruction: the previous value is discarded and replaced by output
>>> data." In the case of a memory constraint you'd want to hope that
>>> "discarded and replaced" would be equivalent to clobbering the address,
>>> at least in cases where the compiler knows the size of the thing, as it
>>> does here.
>>>
>>> Ian.
>>>
>>> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers
>>>
>>
>>
>> Shall I use this contrainst for the stc instructions and send a new
>> patch series?
> 
> If you buy my reasoning and if it works then I guess so.

I will give a try.
Julien Grall June 5, 2013, 1:06 p.m. UTC | #7
On 06/03/2013 04:14 PM, Ian Campbell wrote:

> On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote:
>> On 06/03/2013 03:38 PM, Ian Campbell wrote:
>>
>>> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote:
>>>> On 06/03/2013 03:15 PM, Ian Campbell wrote:
>>>>
>>>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
>>>>>> Add support for VFP context switch on arm32 and a dummy support for arm64
>>>>>>
>>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>>>>>
>>>>>> Changes in v3:
>>>>>>     - Add vfp_init to check if the processor supports VFP 3
>>>>>>     - Add clobber memory
>>>>>>     - Remove tmps
>>>>>>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
>>>>>>
>>>>>> Changes in v2:
>>>>>>     - Fix all the small errors (type, lost headers...)
>>>>>>     - Add some comments
>>>>>> ---
>>>>>>  xen/arch/arm/arm32/Makefile     |    1 +
>>>>>>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
>>>>>>  xen/arch/arm/arm64/Makefile     |    1 +
>>>>>>  xen/arch/arm/arm64/vfp.c        |   13 +++++
>>>>>>  xen/arch/arm/domain.c           |    7 ++-
>>>>>>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
>>>>>>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
>>>>>>  xen/include/asm-arm/cpregs.h    |    9 ++++
>>>>>>  xen/include/asm-arm/domain.h    |    4 ++
>>>>>>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
>>>>>>  10 files changed, 214 insertions(+), 2 deletions(-)
>>>>>>  create mode 100644 xen/arch/arm/arm32/vfp.c
>>>>>>  create mode 100644 xen/arch/arm/arm64/vfp.c
>>>>>>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
>>>>>>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
>>>>>>  create mode 100644 xen/include/asm-arm/vfp.h
>>>>>>
>>>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
>>>>>> index aaf277a..b903803 100644
>>>>>> --- a/xen/arch/arm/arm32/Makefile
>>>>>> +++ b/xen/arch/arm/arm32/Makefile
>>>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
>>>>>>  
>>>>>>  obj-y += traps.o
>>>>>>  obj-y += domain.o
>>>>>> +obj-y += vfp.o
>>>>>>  
>>>>>>  obj-$(EARLY_PRINTK) += debug.o
>>>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
>>>>>> new file mode 100644
>>>>>> index 0000000..2ece43d
>>>>>> --- /dev/null
>>>>>> +++ b/xen/arch/arm/arm32/vfp.c
>>>>>> @@ -0,0 +1,99 @@
>>>>>> +#include <xen/sched.h>
>>>>>> +#include <xen/init.h>
>>>>>> +#include <asm/processor.h>
>>>>>> +#include <asm/vfp.h>
>>>>>> +
>>>>>> +void vfp_save_state(struct vcpu *v)
>>>>>> +{
>>>>>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
>>>>>> +
>>>>>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
>>>>>> +
>>>>>> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
>>>>>> +
>>>>>> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
>>>>>> +    {
>>>>>> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
>>>>>> +
>>>>>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
>>>>>> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
>>>>>> +        /* Disable FPEXC_EX */
>>>>>> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
>>>>>> +    }
>>>>>> +
>>>>>> +    /* Save {d0-d15} */
>>>>>> +    asm volatile("stc p11, cr0, [%0], #32*4"
>>>>>> +                 : : "r" (v->arch.vfp.fpregs1)
>>>>>> +                 : "memory");
>>>>>
>>>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
>>>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
>>>>> the job without clobbering the whole of memory.
>>>>
>>>> I'm not sure to fully understand the concept behind "=m". Does it mean
>>>> that gcc will clobber all the memory range addressed by fpregs?
>>>
>>> I'm not totally confident in this stuff myself....
>>>
>>> Apparently the "=" modified means[0] "this operand is write-only for
>>> this instruction: the previous value is discarded and replaced by output
>>> data." In the case of a memory constraint you'd want to hope that
>>> "discarded and replaced" would be equivalent to clobbering the address,
>>> at least in cases where the compiler knows the size of the thing, as it
>>> does here.
>>>
>>> Ian.
>>>
>>> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers
>>>
>>
>>
>> Shall I use this contrainst for the stc instructions and send a new
>> patch series?
> 
> If you buy my reasoning and if it works then I guess so.


Hi,

So I gave a try and it doesn't work.
The modifiers =m replaces %0 by [rn,imm] which is wrong. So it's not
possible to specify the amount of data which the instruction need to
read (the #32*4) because pre-index and post-index aren't allowed in a
same instruction.

If I remove #32*4, the instruction is different. With objdump on each
generate object:
- vstmia r3!, {d0,d15}
- vstr d0, [rn, imm]

--
Julien
Ian Campbell June 12, 2013, 3:14 p.m. UTC | #8
On Wed, 2013-06-05 at 14:06 +0100, Julien Grall wrote:
> On 06/03/2013 04:14 PM, Ian Campbell wrote:
> 
> > On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote:
> >> On 06/03/2013 03:38 PM, Ian Campbell wrote:
> >>
> >>> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote:
> >>>> On 06/03/2013 03:15 PM, Ian Campbell wrote:
> >>>>
> >>>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
> >>>>>> Add support for VFP context switch on arm32 and a dummy support for arm64
> >>>>>>
> >>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> >>>>>>
> >>>>>> Changes in v3:
> >>>>>>     - Add vfp_init to check if the processor supports VFP 3
> >>>>>>     - Add clobber memory
> >>>>>>     - Remove tmps
> >>>>>>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
> >>>>>>
> >>>>>> Changes in v2:
> >>>>>>     - Fix all the small errors (type, lost headers...)
> >>>>>>     - Add some comments
> >>>>>> ---
> >>>>>>  xen/arch/arm/arm32/Makefile     |    1 +
> >>>>>>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
> >>>>>>  xen/arch/arm/arm64/Makefile     |    1 +
> >>>>>>  xen/arch/arm/arm64/vfp.c        |   13 +++++
> >>>>>>  xen/arch/arm/domain.c           |    7 ++-
> >>>>>>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
> >>>>>>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
> >>>>>>  xen/include/asm-arm/cpregs.h    |    9 ++++
> >>>>>>  xen/include/asm-arm/domain.h    |    4 ++
> >>>>>>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
> >>>>>>  10 files changed, 214 insertions(+), 2 deletions(-)
> >>>>>>  create mode 100644 xen/arch/arm/arm32/vfp.c
> >>>>>>  create mode 100644 xen/arch/arm/arm64/vfp.c
> >>>>>>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
> >>>>>>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
> >>>>>>  create mode 100644 xen/include/asm-arm/vfp.h
> >>>>>>
> >>>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
> >>>>>> index aaf277a..b903803 100644
> >>>>>> --- a/xen/arch/arm/arm32/Makefile
> >>>>>> +++ b/xen/arch/arm/arm32/Makefile
> >>>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
> >>>>>>  
> >>>>>>  obj-y += traps.o
> >>>>>>  obj-y += domain.o
> >>>>>> +obj-y += vfp.o
> >>>>>>  
> >>>>>>  obj-$(EARLY_PRINTK) += debug.o
> >>>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
> >>>>>> new file mode 100644
> >>>>>> index 0000000..2ece43d
> >>>>>> --- /dev/null
> >>>>>> +++ b/xen/arch/arm/arm32/vfp.c
> >>>>>> @@ -0,0 +1,99 @@
> >>>>>> +#include <xen/sched.h>
> >>>>>> +#include <xen/init.h>
> >>>>>> +#include <asm/processor.h>
> >>>>>> +#include <asm/vfp.h>
> >>>>>> +
> >>>>>> +void vfp_save_state(struct vcpu *v)
> >>>>>> +{
> >>>>>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> >>>>>> +
> >>>>>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
> >>>>>> +
> >>>>>> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
> >>>>>> +
> >>>>>> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
> >>>>>> +    {
> >>>>>> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
> >>>>>> +
> >>>>>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
> >>>>>> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
> >>>>>> +        /* Disable FPEXC_EX */
> >>>>>> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
> >>>>>> +    }
> >>>>>> +
> >>>>>> +    /* Save {d0-d15} */
> >>>>>> +    asm volatile("stc p11, cr0, [%0], #32*4"
> >>>>>> +                 : : "r" (v->arch.vfp.fpregs1)
> >>>>>> +                 : "memory");
> >>>>>
> >>>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
> >>>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
> >>>>> the job without clobbering the whole of memory.
> >>>>
> >>>> I'm not sure to fully understand the concept behind "=m". Does it mean
> >>>> that gcc will clobber all the memory range addressed by fpregs?
> >>>
> >>> I'm not totally confident in this stuff myself....
> >>>
> >>> Apparently the "=" modified means[0] "this operand is write-only for
> >>> this instruction: the previous value is discarded and replaced by output
> >>> data." In the case of a memory constraint you'd want to hope that
> >>> "discarded and replaced" would be equivalent to clobbering the address,
> >>> at least in cases where the compiler knows the size of the thing, as it
> >>> does here.
> >>>
> >>> Ian.
> >>>
> >>> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers
> >>>
> >>
> >>
> >> Shall I use this contrainst for the stc instructions and send a new
> >> patch series?
> > 
> > If you buy my reasoning and if it works then I guess so.
> 
> 
> Hi,
> 
> So I gave a try and it doesn't work.

That's a shame, oh well we'll have to use the big clobber then -- not
the end of the world.

> The modifiers =m replaces %0 by [rn,imm] which is wrong. So it's not
> possible to specify the amount of data which the instruction need to
> read (the #32*4) because pre-index and post-index aren't allowed in a
> same instruction.
> 
> If I remove #32*4, the instruction is different. With objdump on each
> generate object:
> - vstmia r3!, {d0,d15}
> - vstr d0, [rn, imm]
> 
> --
> Julien
>
Ian Campbell June 13, 2013, 4:39 p.m. UTC | #9
On Wed, 2013-06-12 at 16:14 +0100, Ian Campbell wrote:
> On Wed, 2013-06-05 at 14:06 +0100, Julien Grall wrote:
> > On 06/03/2013 04:14 PM, Ian Campbell wrote:
> > 
> > > On Mon, 2013-06-03 at 15:57 +0100, Julien Grall wrote:
> > >> On 06/03/2013 03:38 PM, Ian Campbell wrote:
> > >>
> > >>> On Mon, 2013-06-03 at 15:32 +0100, Julien Grall wrote:
> > >>>> On 06/03/2013 03:15 PM, Ian Campbell wrote:
> > >>>>
> > >>>>> On Mon, 2013-06-03 at 15:00 +0100, Julien Grall wrote:
> > >>>>>> Add support for VFP context switch on arm32 and a dummy support for arm64
> > >>>>>>
> > >>>>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> > >>>>>>
> > >>>>>> Changes in v3:
> > >>>>>>     - Add vfp_init to check if the processor supports VFP 3
> > >>>>>>     - Add clobber memory
> > >>>>>>     - Remove tmps
> > >>>>>>     - s/COFNIG_ARM64/CONFIG_ARM64/ in include/asm/arm.h
> > >>>>>>
> > >>>>>> Changes in v2:
> > >>>>>>     - Fix all the small errors (type, lost headers...)
> > >>>>>>     - Add some comments
> > >>>>>> ---
> > >>>>>>  xen/arch/arm/arm32/Makefile     |    1 +
> > >>>>>>  xen/arch/arm/arm32/vfp.c        |   99 +++++++++++++++++++++++++++++++++++++++
> > >>>>>>  xen/arch/arm/arm64/Makefile     |    1 +
> > >>>>>>  xen/arch/arm/arm64/vfp.c        |   13 +++++
> > >>>>>>  xen/arch/arm/domain.c           |    7 ++-
> > >>>>>>  xen/include/asm-arm/arm32/vfp.h |   41 ++++++++++++++++
> > >>>>>>  xen/include/asm-arm/arm64/vfp.h |   16 +++++++
> > >>>>>>  xen/include/asm-arm/cpregs.h    |    9 ++++
> > >>>>>>  xen/include/asm-arm/domain.h    |    4 ++
> > >>>>>>  xen/include/asm-arm/vfp.h       |   25 ++++++++++
> > >>>>>>  10 files changed, 214 insertions(+), 2 deletions(-)
> > >>>>>>  create mode 100644 xen/arch/arm/arm32/vfp.c
> > >>>>>>  create mode 100644 xen/arch/arm/arm64/vfp.c
> > >>>>>>  create mode 100644 xen/include/asm-arm/arm32/vfp.h
> > >>>>>>  create mode 100644 xen/include/asm-arm/arm64/vfp.h
> > >>>>>>  create mode 100644 xen/include/asm-arm/vfp.h
> > >>>>>>
> > >>>>>> diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
> > >>>>>> index aaf277a..b903803 100644
> > >>>>>> --- a/xen/arch/arm/arm32/Makefile
> > >>>>>> +++ b/xen/arch/arm/arm32/Makefile
> > >>>>>> @@ -6,5 +6,6 @@ obj-y += proc-ca15.o
> > >>>>>>  
> > >>>>>>  obj-y += traps.o
> > >>>>>>  obj-y += domain.o
> > >>>>>> +obj-y += vfp.o
> > >>>>>>  
> > >>>>>>  obj-$(EARLY_PRINTK) += debug.o
> > >>>>>> diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
> > >>>>>> new file mode 100644
> > >>>>>> index 0000000..2ece43d
> > >>>>>> --- /dev/null
> > >>>>>> +++ b/xen/arch/arm/arm32/vfp.c
> > >>>>>> @@ -0,0 +1,99 @@
> > >>>>>> +#include <xen/sched.h>
> > >>>>>> +#include <xen/init.h>
> > >>>>>> +#include <asm/processor.h>
> > >>>>>> +#include <asm/vfp.h>
> > >>>>>> +
> > >>>>>> +void vfp_save_state(struct vcpu *v)
> > >>>>>> +{
> > >>>>>> +    v->arch.vfp.fpexc = READ_CP32(FPEXC);
> > >>>>>> +
> > >>>>>> +    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
> > >>>>>> +
> > >>>>>> +    v->arch.vfp.fpscr = READ_CP32(FPSCR);
> > >>>>>> +
> > >>>>>> +    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
> > >>>>>> +    {
> > >>>>>> +        v->arch.vfp.fpinst = READ_CP32(FPINST);
> > >>>>>> +
> > >>>>>> +        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
> > >>>>>> +            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
> > >>>>>> +        /* Disable FPEXC_EX */
> > >>>>>> +        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
> > >>>>>> +    }
> > >>>>>> +
> > >>>>>> +    /* Save {d0-d15} */
> > >>>>>> +    asm volatile("stc p11, cr0, [%0], #32*4"
> > >>>>>> +                 : : "r" (v->arch.vfp.fpregs1)
> > >>>>>> +                 : "memory");
> > >>>>>
> > >>>>> http://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html suggests that
> > >>>>> "=m" (v->arch.vfp.fpregs1) or "=Q" (...) as output constraints might do
> > >>>>> the job without clobbering the whole of memory.
> > >>>>
> > >>>> I'm not sure to fully understand the concept behind "=m". Does it mean
> > >>>> that gcc will clobber all the memory range addressed by fpregs?
> > >>>
> > >>> I'm not totally confident in this stuff myself....
> > >>>
> > >>> Apparently the "=" modified means[0] "this operand is write-only for
> > >>> this instruction: the previous value is discarded and replaced by output
> > >>> data." In the case of a memory constraint you'd want to hope that
> > >>> "discarded and replaced" would be equivalent to clobbering the address,
> > >>> at least in cases where the compiler knows the size of the thing, as it
> > >>> does here.
> > >>>
> > >>> Ian.
> > >>>
> > >>> [0] http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Modifiers.html#Modifiers
> > >>>
> > >>
> > >>
> > >> Shall I use this contrainst for the stc instructions and send a new
> > >> patch series?
> > > 
> > > If you buy my reasoning and if it works then I guess so.
> > 
> > 
> > Hi,
> > 
> > So I gave a try and it doesn't work.
> 
> That's a shame, oh well we'll have to use the big clobber then -- not
> the end of the world.
> 
> > The modifiers =m replaces %0 by [rn,imm] which is wrong. So it's not
> > possible to specify the amount of data which the instruction need to
> > read (the #32*4) because pre-index and post-index aren't allowed in a
> > same instruction.

I had a chat with Tim and he pointed out that this is exactly what the
=Q output constraint is for. According to
http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/Machine-Constraints.html#Machine-Constraints
it is:

        A memory reference where the exact address is in a single
        register

In other words it outlaws the [rn,imm] construct.

Does that work?

(that doc goes on to say:
         (`‘m’' is preferable for asm statements) 
lets ignore that ;-))

Ian.
diff mbox

Patch

diff --git a/xen/arch/arm/arm32/Makefile b/xen/arch/arm/arm32/Makefile
index aaf277a..b903803 100644
--- a/xen/arch/arm/arm32/Makefile
+++ b/xen/arch/arm/arm32/Makefile
@@ -6,5 +6,6 @@  obj-y += proc-ca15.o
 
 obj-y += traps.o
 obj-y += domain.o
+obj-y += vfp.o
 
 obj-$(EARLY_PRINTK) += debug.o
diff --git a/xen/arch/arm/arm32/vfp.c b/xen/arch/arm/arm32/vfp.c
new file mode 100644
index 0000000..2ece43d
--- /dev/null
+++ b/xen/arch/arm/arm32/vfp.c
@@ -0,0 +1,99 @@ 
+#include <xen/sched.h>
+#include <xen/init.h>
+#include <asm/processor.h>
+#include <asm/vfp.h>
+
+void vfp_save_state(struct vcpu *v)
+{
+    v->arch.vfp.fpexc = READ_CP32(FPEXC);
+
+    WRITE_CP32(v->arch.vfp.fpexc | FPEXC_EN, FPEXC);
+
+    v->arch.vfp.fpscr = READ_CP32(FPSCR);
+
+    if ( v->arch.vfp.fpexc & FPEXC_EX ) /* Check for sub-architecture */
+    {
+        v->arch.vfp.fpinst = READ_CP32(FPINST);
+
+        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
+            v->arch.vfp.fpinst2 = READ_CP32(FPINST2);
+        /* Disable FPEXC_EX */
+        WRITE_CP32((v->arch.vfp.fpexc | FPEXC_EN) & ~FPEXC_EX, FPEXC);
+    }
+
+    /* Save {d0-d15} */
+    asm volatile("stc p11, cr0, [%0], #32*4"
+                 : : "r" (v->arch.vfp.fpregs1)
+                 : "memory");
+
+    /* 32 x 64 bits registers? */
+    if ( (READ_CP32(MVFR0) & MVFR0_A_SIMD_MASK) == 2 )
+    {
+        /* Save {d16-d31} */
+        asm volatile("stcl p11, cr0, [%0], #32*4"
+                     : : "r" (v->arch.vfp.fpregs2)
+                     : "memory");
+    }
+
+    WRITE_CP32(v->arch.vfp.fpexc & ~(FPEXC_EN), FPEXC);
+}
+
+void vfp_restore_state(struct vcpu *v)
+{
+    WRITE_CP32(READ_CP32(FPEXC) | FPEXC_EN, FPEXC);
+
+    /* Restore {d0-d15} */
+    asm volatile("ldc p11, cr0, [%0], #32*4"
+                 : : "r" (v->arch.vfp.fpregs1)
+                 : "memory");
+
+    /* 32 x 64 bits registers? */
+    if ( (READ_CP32(MVFR0) & MVFR0_A_SIMD_MASK) == 2 ) /* 32 x 64 bits registers */
+        /* Restore {d16-d31} */
+        asm volatile("ldcl p11, cr0, [%0], #32*4"
+                     : : "r" (v->arch.vfp.fpregs2)
+                     : "memory");
+
+    if ( v->arch.vfp.fpexc & FPEXC_EX )
+    {
+        WRITE_CP32(v->arch.vfp.fpinst, FPINST);
+        if ( v->arch.vfp.fpexc & FPEXC_FP2V )
+            WRITE_CP32(v->arch.vfp.fpinst2, FPINST2);
+    }
+
+    WRITE_CP32(v->arch.vfp.fpscr, FPSCR);
+
+    WRITE_CP32(v->arch.vfp.fpexc, FPEXC);
+}
+
+static __init int vfp_init(void)
+{
+    unsigned int vfpsid;
+    unsigned int vfparch;
+
+    vfpsid = READ_CP32(FPSID);
+
+    printk("VFP implementer 0x%02x architecture %d part 0x%02x variant 0x%x "
+           "rev 0x%x\n",
+           (vfpsid & FPSID_IMPLEMENTER_MASK) >> FPSID_IMPLEMENTER_BIT,
+           (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT,
+           (vfpsid & FPSID_PART_MASK) >> FPSID_PART_BIT,
+           (vfpsid & FPSID_VARIANT_MASK) >> FPSID_VARIANT_BIT,
+           (vfpsid & FPSID_REV_MASK) >> FPSID_REV_BIT);
+
+    vfparch = (vfpsid & FPSID_ARCH_MASK) >> FPSID_ARCH_BIT;
+    if ( vfparch < 2 )
+        panic("Xen only support VFP 3\n");
+
+    return 0;
+}
+presmp_initcall(vfp_init);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/arch/arm/arm64/Makefile b/xen/arch/arm/arm64/Makefile
index 9484548..e06a0a9 100644
--- a/xen/arch/arm/arm64/Makefile
+++ b/xen/arch/arm/arm64/Makefile
@@ -5,5 +5,6 @@  obj-y += mode_switch.o
 
 obj-y += traps.o
 obj-y += domain.o
+obj-y += vfp.o
 
 obj-$(EARLY_PRINTK) += debug.o
diff --git a/xen/arch/arm/arm64/vfp.c b/xen/arch/arm/arm64/vfp.c
new file mode 100644
index 0000000..74e6a50
--- /dev/null
+++ b/xen/arch/arm/arm64/vfp.c
@@ -0,0 +1,13 @@ 
+#include <xen/sched.h>
+#include <asm/processor.h>
+#include <asm/vfp.h>
+
+void vfp_save_state(struct vcpu *v)
+{
+    /* TODO: implement it */
+}
+
+void vfp_restore_state(struct vcpu *v)
+{
+    /* TODO: implement it */
+}
diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c
index 4c434a1..f465ab7 100644
--- a/xen/arch/arm/domain.c
+++ b/xen/arch/arm/domain.c
@@ -27,6 +27,7 @@ 
 #include <asm/p2m.h>
 #include <asm/irq.h>
 #include <asm/cpufeature.h>
+#include <asm/vfp.h>
 
 #include <asm/gic.h>
 #include "vtimer.h"
@@ -117,7 +118,8 @@  static void ctxt_switch_from(struct vcpu *p)
 
     /* XXX MPU */
 
-    /* XXX VFP */
+    /* VFP */
+    vfp_save_state(p);
 
     /* VGIC */
     gic_save_state(p);
@@ -143,7 +145,8 @@  static void ctxt_switch_to(struct vcpu *n)
     /* VGIC */
     gic_restore_state(n);
 
-    /* XXX VFP */
+    /* VFP */
+    vfp_restore_state(n);
 
     /* XXX MPU */
 
diff --git a/xen/include/asm-arm/arm32/vfp.h b/xen/include/asm-arm/arm32/vfp.h
new file mode 100644
index 0000000..bade3bc
--- /dev/null
+++ b/xen/include/asm-arm/arm32/vfp.h
@@ -0,0 +1,41 @@ 
+#ifndef _ARM_ARM32_VFP_H
+#define _ARM_ARM32_VFP_H
+
+#define FPEXC_EX                (1u << 31)
+#define FPEXC_EN                (1u << 30)
+#define FPEXC_FP2V              (1u << 28)
+
+#define MVFR0_A_SIMD_MASK       (0xf << 0)
+
+
+#define FPSID_IMPLEMENTER_BIT   (24)
+#define FPSID_IMPLEMENTER_MASK  (0xff << FPSID_IMPLEMENTER_BIT)
+#define FPSID_ARCH_BIT          (16)
+#define FPSID_ARCH_MASK         (0xf << FPSID_ARCH_BIT)
+#define FPSID_PART_BIT          (8)
+#define FPSID_PART_MASK         (0xff << FPSID_PART_BIT)
+#define FPSID_VARIANT_BIT       (4)
+#define FPSID_VARIANT_MASK      (0xf << FPSID_VARIANT_BIT)
+#define FPSID_REV_BIT           (0)
+#define FPSID_REV_MASK          (0xf << FPSID_REV_BIT)
+
+struct vfp_state
+{
+    uint64_t fpregs1[16]; /* {d0-d15} */
+    uint64_t fpregs2[16]; /* {d16-d31} */
+    uint32_t fpexc;
+    uint32_t fpscr;
+    /* VFP implementation specific state */
+    uint32_t fpinst;
+    uint32_t fpinst2;
+};
+
+#endif /* _ARM_ARM32_VFP_H */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-arm/arm64/vfp.h b/xen/include/asm-arm/arm64/vfp.h
new file mode 100644
index 0000000..3733d2c
--- /dev/null
+++ b/xen/include/asm-arm/arm64/vfp.h
@@ -0,0 +1,16 @@ 
+#ifndef _ARM_ARM64_VFP_H
+#define _ARM_ARM64_VFP_H
+
+struct vfp_state
+{
+};
+
+#endif /* _ARM_ARM64_VFP_H */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-arm/cpregs.h b/xen/include/asm-arm/cpregs.h
index f08d59a..122dd1a 100644
--- a/xen/include/asm-arm/cpregs.h
+++ b/xen/include/asm-arm/cpregs.h
@@ -60,6 +60,15 @@ 
  * arguments, which are cp,opc1,crn,crm,opc2.
  */
 
+/* Coprocessor 10 */
+
+#define FPSID           p10,7,c0,c0,0   /* Floating-Point System ID Register */
+#define FPSCR           p10,7,c1,c0,0   /* Floating-Point Status and Control Register */
+#define MVFR0           p10,7,c7,c0,0   /* Media and VFP Feature Register 0 */
+#define FPEXC           p10,7,c8,c0,0   /* Floating-Point Exception Control Register */
+#define FPINST          p10,7,c9,c0,0   /* Floating-Point Instruction Register */
+#define FPINST2         p10,7,c10,c0,0  /* Floating-point Instruction Register 2 */
+
 /* Coprocessor 14 */
 
 /* CP14 CR0: */
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index cb251cc..339b6e6 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -6,6 +6,7 @@ 
 #include <xen/sched.h>
 #include <asm/page.h>
 #include <asm/p2m.h>
+#include <asm/vfp.h>
 #include <public/hvm/params.h>
 
 /* Represents state corresponding to a block of 32 interrupts */
@@ -188,6 +189,9 @@  struct arch_vcpu
     uint32_t joscr, jmcr;
 #endif
 
+    /* Float-pointer */
+    struct vfp_state vfp;
+
     /* CP 15 */
     uint32_t csselr;
 
diff --git a/xen/include/asm-arm/vfp.h b/xen/include/asm-arm/vfp.h
new file mode 100644
index 0000000..5f10fe5
--- /dev/null
+++ b/xen/include/asm-arm/vfp.h
@@ -0,0 +1,25 @@ 
+#ifndef _ASM_VFP_H
+#define _ASM_VFP_H
+
+#include <xen/sched.h>
+
+#if defined(CONFIG_ARM_32)
+# include <asm/arm32/vfp.h>
+#elif defined(CONFIG_ARM_64)
+# include <asm/arm64/vfp.h>
+#else
+# error "Unknown ARM variant"
+#endif
+
+void vfp_save_state(struct vcpu *v);
+void vfp_restore_state(struct vcpu *v);
+
+#endif /* _ASM_VFP_H */
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */