diff mbox series

[v2,1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer

Message ID 20210202180028.876888-2-imbrenda@linux.ibm.com
State Superseded
Headers show
Series [v2,1/2] s390/kvm: extend kvm_s390_shadow_fault to return entry pointer | expand

Commit Message

Claudio Imbrenda Feb. 2, 2021, 6 p.m. UTC
Extend kvm_s390_shadow_fault to return the pointer to the valid leaf
DAT table entry, or to the invalid entry.

Also return some flags in the lower bits of the address:
DAT_PROT: indicates that DAT protection applies because of the
          protection bit in the segment (or, if EDAT, region) tables
NOT_PTE: indicates that the address of the DAT table entry returned
         does not refer to a PTE, but to a segment or region table.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Cc: stable@vger.kernel.org
---
 arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----
 arch/s390/kvm/gaccess.h |  5 ++++-
 arch/s390/kvm/vsie.c    |  8 ++++----
 3 files changed, 30 insertions(+), 9 deletions(-)

Comments

Janosch Frank Feb. 4, 2021, 4:34 p.m. UTC | #1
On 2/2/21 7:00 PM, Claudio Imbrenda wrote:
> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf

> DAT table entry, or to the invalid entry.

> 

> Also return some flags in the lower bits of the address:

> DAT_PROT: indicates that DAT protection applies because of the

>           protection bit in the segment (or, if EDAT, region) tables

> NOT_PTE: indicates that the address of the DAT table entry returned

>          does not refer to a PTE, but to a segment or region table.

> 

> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

> Cc: stable@vger.kernel.org

> ---

>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----

>  arch/s390/kvm/gaccess.h |  5 ++++-

>  arch/s390/kvm/vsie.c    |  8 ++++----

>  3 files changed, 30 insertions(+), 9 deletions(-)

> 

> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

> index 6d6b57059493..2d7bcbfb185e 100644

> --- a/arch/s390/kvm/gaccess.c

> +++ b/arch/s390/kvm/gaccess.c

> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>  			rfte.val = ptr;

>  			goto shadow_r2t;

>  		}

> +		*pgt = ptr + vaddr.rfx * 8;


So pgt either is a table entry if rc > 0 or a pointer to the first pte
on rc == 0 after this change?

Hrm, if it is really based on RCs than I might be able to come to terms
with having two things in a ptr with the name pgt. But it needs a
comment change.

>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);

>  		if (rc)

>  			return rc;

> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>  			rste.val = ptr;

>  			goto shadow_r3t;

>  		}

> +		*pgt = ptr + vaddr.rsx * 8;

>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);

>  		if (rc)

>  			return rc;

> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>  			rtte.val = ptr;

>  			goto shadow_sgt;

>  		}

> +		*pgt = ptr + vaddr.rtx * 8;

>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);

>  		if (rc)

>  			return rc;

> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>  			ste.val = ptr;

>  			goto shadow_pgt;

>  		}

> +		*pgt = ptr + vaddr.sx * 8;

>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);

>  		if (rc)

>  			return rc;

> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>   * @vcpu: virtual cpu

>   * @sg: pointer to the shadow guest address space structure

>   * @saddr: faulting address in the shadow gmap

> + * @pteptr: will contain the address of the faulting DAT table entry, or of

> + *          the valid leaf, plus some flags


pteptr is not the right name if it can be two things

>   *

>   * Returns: - 0 if the shadow fault was successfully resolved

>   *	    - > 0 (pgm exception code) on exceptions while faulting

> @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>   *	    - -ENOMEM if out of memory

>   */

>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,

> -			  unsigned long saddr)

> +			  unsigned long saddr, unsigned long *pteptr)

>  {

>  	union vaddress vaddr;

>  	union page_table_entry pte;

> -	unsigned long pgt;

> +	unsigned long pgt = 0;

>  	int dat_protection, fake;

>  	int rc;

>  

> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,

>  		pte.val = pgt + vaddr.px * PAGE_SIZE;

>  		goto shadow_page;

>  	}

> -	if (!rc)

> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);

> +

> +	switch (rc) {

> +	case PGM_SEGMENT_TRANSLATION:

> +	case PGM_REGION_THIRD_TRANS:

> +	case PGM_REGION_SECOND_TRANS:

> +	case PGM_REGION_FIRST_TRANS:

> +		pgt |= NOT_PTE;


GACC_TRANSL_ENTRY_INV ?

> +		break;

> +	case 0:

> +		pgt += vaddr.px * 8;

> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);

> +	}

> +	if (*pteptr)

> +		*pteptr = pgt | dat_protection * DAT_PROT;

>  	if (!rc && pte.i)

>  		rc = PGM_PAGE_TRANSLATION;

>  	if (!rc && pte.z)

> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h

> index f4c51756c462..66a6e2cec97a 100644

> --- a/arch/s390/kvm/gaccess.h

> +++ b/arch/s390/kvm/gaccess.h

> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);

>  int ipte_lock_held(struct kvm_vcpu *vcpu);

>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);

>  

> +#define DAT_PROT 2


GACC_TRANSL_ENTRY_PROT

> +#define NOT_PTE 4

> +

>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,

> -			  unsigned long saddr);

> +			  unsigned long saddr, unsigned long *pteptr);

>  

>  #endif /* __KVM_S390_GACCESS_H */

> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

> index c5d0a58b2c29..7db022141db3 100644

> --- a/arch/s390/kvm/vsie.c

> +++ b/arch/s390/kvm/vsie.c

> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)

>  	/* with mso/msl, the prefix lies at offset *mso* */

>  	prefix += scb_s->mso;

>  

> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);

> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);

>  	if (!rc && (scb_s->ecb & ECB_TE))

>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> -					   prefix + PAGE_SIZE);

> +					   prefix + PAGE_SIZE, NULL);

>  	/*

>  	 * We don't have to mprotect, we will be called for all unshadows.

>  	 * SIE will detect if protection applies and trigger a validity.

> @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)

>  				    current->thread.gmap_addr, 1);

>  

>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> -				   current->thread.gmap_addr);

> +				   current->thread.gmap_addr, NULL);

>  	if (rc > 0) {

>  		rc = inject_fault(vcpu, rc,

>  				  current->thread.gmap_addr,

> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu,

>  {

>  	if (vsie_page->fault_addr)

>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> -				      vsie_page->fault_addr);

> +				      vsie_page->fault_addr, NULL);


Ok

>  	vsie_page->fault_addr = 0;

>  }

>  

>
Janosch Frank Feb. 4, 2021, 5:05 p.m. UTC | #2
On 2/4/21 5:34 PM, Janosch Frank wrote:
> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:

>> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf

>> DAT table entry, or to the invalid entry.

>>

>> Also return some flags in the lower bits of the address:

>> DAT_PROT: indicates that DAT protection applies because of the

>>           protection bit in the segment (or, if EDAT, region) tables

>> NOT_PTE: indicates that the address of the DAT table entry returned

>>          does not refer to a PTE, but to a segment or region table.

>>

>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

>> Cc: stable@vger.kernel.org

>> ---

>>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----

>>  arch/s390/kvm/gaccess.h |  5 ++++-

>>  arch/s390/kvm/vsie.c    |  8 ++++----

>>  3 files changed, 30 insertions(+), 9 deletions(-)

>>

>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

>> index 6d6b57059493..2d7bcbfb185e 100644

>> --- a/arch/s390/kvm/gaccess.c

>> +++ b/arch/s390/kvm/gaccess.c

>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>>  			rfte.val = ptr;

>>  			goto shadow_r2t;

>>  		}

>> +		*pgt = ptr + vaddr.rfx * 8;

> 

> So pgt either is a table entry if rc > 0 or a pointer to the first pte

> on rc == 0 after this change?

> 

> Hrm, if it is really based on RCs than I might be able to come to terms

> with having two things in a ptr with the name pgt. But it needs a

> comment change.

> 

>>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);

>>  		if (rc)

>>  			return rc;

>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>>  			rste.val = ptr;

>>  			goto shadow_r3t;

>>  		}

>> +		*pgt = ptr + vaddr.rsx * 8;

>>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);

>>  		if (rc)

>>  			return rc;

>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>>  			rtte.val = ptr;

>>  			goto shadow_sgt;

>>  		}

>> +		*pgt = ptr + vaddr.rtx * 8;

>>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);

>>  		if (rc)

>>  			return rc;

>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>>  			ste.val = ptr;

>>  			goto shadow_pgt;

>>  		}

>> +		*pgt = ptr + vaddr.sx * 8;

>>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);

>>  		if (rc)

>>  			return rc;

>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>>   * @vcpu: virtual cpu

>>   * @sg: pointer to the shadow guest address space structure

>>   * @saddr: faulting address in the shadow gmap

>> + * @pteptr: will contain the address of the faulting DAT table entry, or of

>> + *          the valid leaf, plus some flags

> 

> pteptr is not the right name if it can be two things


You use it for pei only, right?

> 

>>   *

>>   * Returns: - 0 if the shadow fault was successfully resolved

>>   *	    - > 0 (pgm exception code) on exceptions while faulting

>> @@ -1165,11 +1171,11 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>>   *	    - -ENOMEM if out of memory

>>   */

>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,

>> -			  unsigned long saddr)

>> +			  unsigned long saddr, unsigned long *pteptr)

>>  {

>>  	union vaddress vaddr;

>>  	union page_table_entry pte;

>> -	unsigned long pgt;

>> +	unsigned long pgt = 0;

>>  	int dat_protection, fake;

>>  	int rc;

>>  

>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,

>>  		pte.val = pgt + vaddr.px * PAGE_SIZE;

>>  		goto shadow_page;

>>  	}

>> -	if (!rc)

>> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);

>> +

>> +	switch (rc) {

>> +	case PGM_SEGMENT_TRANSLATION:

>> +	case PGM_REGION_THIRD_TRANS:

>> +	case PGM_REGION_SECOND_TRANS:

>> +	case PGM_REGION_FIRST_TRANS:

>> +		pgt |= NOT_PTE;

> 

> GACC_TRANSL_ENTRY_INV ?

> 

>> +		break;

>> +	case 0:

>> +		pgt += vaddr.px * 8;

>> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);

>> +	}

>> +	if (*pteptr)

>> +		*pteptr = pgt | dat_protection * DAT_PROT;

>>  	if (!rc && pte.i)

>>  		rc = PGM_PAGE_TRANSLATION;

>>  	if (!rc && pte.z)

>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h

>> index f4c51756c462..66a6e2cec97a 100644

>> --- a/arch/s390/kvm/gaccess.h

>> +++ b/arch/s390/kvm/gaccess.h

>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);

>>  int ipte_lock_held(struct kvm_vcpu *vcpu);

>>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);

>>  

>> +#define DAT_PROT 2

> 

> GACC_TRANSL_ENTRY_PROT


Ok after a second pass that's not what's going on here.
Those basically directly correspond to the MVPG PEI indication bits, right?

Do we also need to consider bit 63?

> 

>> +#define NOT_PTE 4

>> +

>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,

>> -			  unsigned long saddr);

>> +			  unsigned long saddr, unsigned long *pteptr);

>>  

>>  #endif /* __KVM_S390_GACCESS_H */

>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

>> index c5d0a58b2c29..7db022141db3 100644

>> --- a/arch/s390/kvm/vsie.c

>> +++ b/arch/s390/kvm/vsie.c

>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)

>>  	/* with mso/msl, the prefix lies at offset *mso* */

>>  	prefix += scb_s->mso;

>>  

>> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);

>> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);

>>  	if (!rc && (scb_s->ecb & ECB_TE))

>>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

>> -					   prefix + PAGE_SIZE);

>> +					   prefix + PAGE_SIZE, NULL);

>>  	/*

>>  	 * We don't have to mprotect, we will be called for all unshadows.

>>  	 * SIE will detect if protection applies and trigger a validity.

>> @@ -913,7 +913,7 @@ static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)

>>  				    current->thread.gmap_addr, 1);

>>  

>>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

>> -				   current->thread.gmap_addr);

>> +				   current->thread.gmap_addr, NULL);

>>  	if (rc > 0) {

>>  		rc = inject_fault(vcpu, rc,

>>  				  current->thread.gmap_addr,

>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu *vcpu,

>>  {

>>  	if (vsie_page->fault_addr)

>>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

>> -				      vsie_page->fault_addr);

>> +				      vsie_page->fault_addr, NULL);

> 

> Ok

> 

>>  	vsie_page->fault_addr = 0;

>>  }

>>  

>>

>
Claudio Imbrenda Feb. 5, 2021, 12:15 p.m. UTC | #3
On Thu, 4 Feb 2021 17:34:00 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:

> > Extend kvm_s390_shadow_fault to return the pointer to the valid leaf

> > DAT table entry, or to the invalid entry.

> > 

> > Also return some flags in the lower bits of the address:

> > DAT_PROT: indicates that DAT protection applies because of the

> >           protection bit in the segment (or, if EDAT, region) tables

> > NOT_PTE: indicates that the address of the DAT table entry returned

> >          does not refer to a PTE, but to a segment or region table.

> > 

> > Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

> > Cc: stable@vger.kernel.org

> > ---

> >  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----

> >  arch/s390/kvm/gaccess.h |  5 ++++-

> >  arch/s390/kvm/vsie.c    |  8 ++++----

> >  3 files changed, 30 insertions(+), 9 deletions(-)

> > 

> > diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

> > index 6d6b57059493..2d7bcbfb185e 100644

> > --- a/arch/s390/kvm/gaccess.c

> > +++ b/arch/s390/kvm/gaccess.c

> > @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap

> > *sg, unsigned long saddr, rfte.val = ptr;

> >  			goto shadow_r2t;

> >  		}

> > +		*pgt = ptr + vaddr.rfx * 8;  

> 

> So pgt either is a table entry if rc > 0 or a pointer to the first pte

> on rc == 0 after this change?


yes

> Hrm, if it is really based on RCs than I might be able to come to

> terms with having two things in a ptr with the name pgt. But it needs

> a comment change.


will do.

> >  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,

> > &rfte.val); if (rc)

> >  			return rc;

> > @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap

> > *sg, unsigned long saddr, rste.val = ptr;

> >  			goto shadow_r3t;

> >  		}

> > +		*pgt = ptr + vaddr.rsx * 8;

> >  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,

> > &rste.val); if (rc)

> >  			return rc;

> > @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap

> > *sg, unsigned long saddr, rtte.val = ptr;

> >  			goto shadow_sgt;

> >  		}

> > +		*pgt = ptr + vaddr.rtx * 8;

> >  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,

> > &rtte.val); if (rc)

> >  			return rc;

> > @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap

> > *sg, unsigned long saddr, ste.val = ptr;

> >  			goto shadow_pgt;

> >  		}

> > +		*pgt = ptr + vaddr.sx * 8;

> >  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8,

> > &ste.val); if (rc)

> >  			return rc;

> > @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap

> > *sg, unsigned long saddr,

> >   * @vcpu: virtual cpu

> >   * @sg: pointer to the shadow guest address space structure

> >   * @saddr: faulting address in the shadow gmap

> > + * @pteptr: will contain the address of the faulting DAT table

> > entry, or of

> > + *          the valid leaf, plus some flags  

> 

> pteptr is not the right name if it can be two things


it cannot be two things there, kvm_s390_shadow_fault always returns a
DAT _entry_ (pte, segment, region).

> >   *

> >   * Returns: - 0 if the shadow fault was successfully resolved

> >   *	    - > 0 (pgm exception code) on exceptions while

> > faulting @@ -1165,11 +1171,11 @@ static int

> > kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

> >   *	    - -ENOMEM if out of memory

> >   */

> >  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,

> > -			  unsigned long saddr)

> > +			  unsigned long saddr, unsigned long

> > *pteptr) {

> >  	union vaddress vaddr;

> >  	union page_table_entry pte;

> > -	unsigned long pgt;

> > +	unsigned long pgt = 0;

> >  	int dat_protection, fake;

> >  	int rc;

> >  

> > @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu

> > *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;

> >  		goto shadow_page;

> >  	}

> > -	if (!rc)

> > -		rc = gmap_read_table(sg->parent, pgt + vaddr.px *

> > 8, &pte.val); +

> > +	switch (rc) {

> > +	case PGM_SEGMENT_TRANSLATION:

> > +	case PGM_REGION_THIRD_TRANS:

> > +	case PGM_REGION_SECOND_TRANS:

> > +	case PGM_REGION_FIRST_TRANS:

> > +		pgt |= NOT_PTE;  

> 

> GACC_TRANSL_ENTRY_INV ?


no, this is only for non-pte entries

> > +		break;

> > +	case 0:

> > +		pgt += vaddr.px * 8;

> > +		rc = gmap_read_table(sg->parent, pgt, &pte.val);

> > +	}

> > +	if (*pteptr)

> > +		*pteptr = pgt | dat_protection * DAT_PROT;

> >  	if (!rc && pte.i)

> >  		rc = PGM_PAGE_TRANSLATION;

> >  	if (!rc && pte.z)

> > diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h

> > index f4c51756c462..66a6e2cec97a 100644

> > --- a/arch/s390/kvm/gaccess.h

> > +++ b/arch/s390/kvm/gaccess.h

> > @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);

> >  int ipte_lock_held(struct kvm_vcpu *vcpu);

> >  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,

> > unsigned long gra); 

> > +#define DAT_PROT 2  

> 

> GACC_TRANSL_ENTRY_PROT


this is also only for non-pte entries

> > +#define NOT_PTE 4

> > +

> >  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap

> > *shadow,

> > -			  unsigned long saddr);

> > +			  unsigned long saddr, unsigned long

> > *pteptr); 

> >  #endif /* __KVM_S390_GACCESS_H */

> > diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

> > index c5d0a58b2c29..7db022141db3 100644

> > --- a/arch/s390/kvm/vsie.c

> > +++ b/arch/s390/kvm/vsie.c

> > @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,

> > struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at

> > offset *mso* */ prefix += scb_s->mso;

> >  

> > -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);

> > +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,

> > NULL); if (!rc && (scb_s->ecb & ECB_TE))

> >  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> > -					   prefix + PAGE_SIZE);

> > +					   prefix + PAGE_SIZE,

> > NULL); /*

> >  	 * We don't have to mprotect, we will be called for all

> > unshadows.

> >  	 * SIE will detect if protection applies and trigger a

> > validity. @@ -913,7 +913,7 @@ static int handle_fault(struct

> > kvm_vcpu *vcpu, struct vsie_page *vsie_page)

> > current->thread.gmap_addr, 1); 

> >  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> > -				   current->thread.gmap_addr);

> > +				   current->thread.gmap_addr,

> > NULL); if (rc > 0) {

> >  		rc = inject_fault(vcpu, rc,

> >  				  current->thread.gmap_addr,

> > @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu

> > *vcpu, {

> >  	if (vsie_page->fault_addr)

> >  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> > -				      vsie_page->fault_addr);

> > +				      vsie_page->fault_addr,

> > NULL);  

> 

> Ok

> 

> >  	vsie_page->fault_addr = 0;

> >  }

> >  

> >   

>
Claudio Imbrenda Feb. 5, 2021, 12:18 p.m. UTC | #4
On Thu, 4 Feb 2021 18:05:15 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 2/4/21 5:34 PM, Janosch Frank wrote:

> > On 2/2/21 7:00 PM, Claudio Imbrenda wrote:  

> >> Extend kvm_s390_shadow_fault to return the pointer to the valid

> >> leaf DAT table entry, or to the invalid entry.

> >>

> >> Also return some flags in the lower bits of the address:

> >> DAT_PROT: indicates that DAT protection applies because of the

> >>           protection bit in the segment (or, if EDAT, region)

> >> tables NOT_PTE: indicates that the address of the DAT table entry

> >> returned does not refer to a PTE, but to a segment or region table.

> >>

> >> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

> >> Cc: stable@vger.kernel.org

> >> ---

> >>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----

> >>  arch/s390/kvm/gaccess.h |  5 ++++-

> >>  arch/s390/kvm/vsie.c    |  8 ++++----

> >>  3 files changed, 30 insertions(+), 9 deletions(-)

> >>

> >> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

> >> index 6d6b57059493..2d7bcbfb185e 100644

> >> --- a/arch/s390/kvm/gaccess.c

> >> +++ b/arch/s390/kvm/gaccess.c

> >> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct

> >> gmap *sg, unsigned long saddr, rfte.val = ptr;

> >>  			goto shadow_r2t;

> >>  		}

> >> +		*pgt = ptr + vaddr.rfx * 8;  

> > 

> > So pgt either is a table entry if rc > 0 or a pointer to the first

> > pte on rc == 0 after this change?

> > 

> > Hrm, if it is really based on RCs than I might be able to come to

> > terms with having two things in a ptr with the name pgt. But it

> > needs a comment change.

> >   

> >>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,

> >> &rfte.val); if (rc)

> >>  			return rc;

> >> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct

> >> gmap *sg, unsigned long saddr, rste.val = ptr;

> >>  			goto shadow_r3t;

> >>  		}

> >> +		*pgt = ptr + vaddr.rsx * 8;

> >>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,

> >> &rste.val); if (rc)

> >>  			return rc;

> >> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct

> >> gmap *sg, unsigned long saddr, rtte.val = ptr;

> >>  			goto shadow_sgt;

> >>  		}

> >> +		*pgt = ptr + vaddr.rtx * 8;

> >>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,

> >> &rtte.val); if (rc)

> >>  			return rc;

> >> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct

> >> gmap *sg, unsigned long saddr, ste.val = ptr;

> >>  			goto shadow_pgt;

> >>  		}

> >> +		*pgt = ptr + vaddr.sx * 8;

> >>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8,

> >> &ste.val); if (rc)

> >>  			return rc;

> >> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct

> >> gmap *sg, unsigned long saddr,

> >>   * @vcpu: virtual cpu

> >>   * @sg: pointer to the shadow guest address space structure

> >>   * @saddr: faulting address in the shadow gmap

> >> + * @pteptr: will contain the address of the faulting DAT table

> >> entry, or of

> >> + *          the valid leaf, plus some flags  

> > 

> > pteptr is not the right name if it can be two things  

> 

> You use it for pei only, right?


yes

> >   

> >>   *

> >>   * Returns: - 0 if the shadow fault was successfully resolved

> >>   *	    - > 0 (pgm exception code) on exceptions while

> >> faulting @@ -1165,11 +1171,11 @@ static int

> >> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

> >>   *	    - -ENOMEM if out of memory

> >>   */

> >>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,

> >> -			  unsigned long saddr)

> >> +			  unsigned long saddr, unsigned long

> >> *pteptr) {

> >>  	union vaddress vaddr;

> >>  	union page_table_entry pte;

> >> -	unsigned long pgt;

> >> +	unsigned long pgt = 0;

> >>  	int dat_protection, fake;

> >>  	int rc;

> >>  

> >> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu

> >> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;

> >>  		goto shadow_page;

> >>  	}

> >> -	if (!rc)

> >> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px *

> >> 8, &pte.val); +

> >> +	switch (rc) {

> >> +	case PGM_SEGMENT_TRANSLATION:

> >> +	case PGM_REGION_THIRD_TRANS:

> >> +	case PGM_REGION_SECOND_TRANS:

> >> +	case PGM_REGION_FIRST_TRANS:

> >> +		pgt |= NOT_PTE;  

> > 

> > GACC_TRANSL_ENTRY_INV ?

> >   

> >> +		break;

> >> +	case 0:

> >> +		pgt += vaddr.px * 8;

> >> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);

> >> +	}

> >> +	if (*pteptr)

> >> +		*pteptr = pgt | dat_protection * DAT_PROT;

> >>  	if (!rc && pte.i)

> >>  		rc = PGM_PAGE_TRANSLATION;

> >>  	if (!rc && pte.z)

> >> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h

> >> index f4c51756c462..66a6e2cec97a 100644

> >> --- a/arch/s390/kvm/gaccess.h

> >> +++ b/arch/s390/kvm/gaccess.h

> >> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);

> >>  int ipte_lock_held(struct kvm_vcpu *vcpu);

> >>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,

> >> unsigned long gra); 

> >> +#define DAT_PROT 2  

> > 

> > GACC_TRANSL_ENTRY_PROT  

> 

> Ok after a second pass that's not what's going on here.

> Those basically directly correspond to the MVPG PEI indication bits,

> right?


yes :)

> Do we also need to consider bit 63?


no, that can only happen if a specific SIE feature is used, which KVM
neither uses nor supports for VSIE, so it cannot happen

> >   

> >> +#define NOT_PTE 4

> >> +

> >>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap

> >> *shadow,

> >> -			  unsigned long saddr);

> >> +			  unsigned long saddr, unsigned long

> >> *pteptr); 

> >>  #endif /* __KVM_S390_GACCESS_H */

> >> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

> >> index c5d0a58b2c29..7db022141db3 100644

> >> --- a/arch/s390/kvm/vsie.c

> >> +++ b/arch/s390/kvm/vsie.c

> >> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,

> >> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at

> >> offset *mso* */ prefix += scb_s->mso;

> >>  

> >> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);

> >> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,

> >> NULL); if (!rc && (scb_s->ecb & ECB_TE))

> >>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> >> -					   prefix + PAGE_SIZE);

> >> +					   prefix + PAGE_SIZE,

> >> NULL); /*

> >>  	 * We don't have to mprotect, we will be called for all

> >> unshadows.

> >>  	 * SIE will detect if protection applies and trigger a

> >> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct

> >> kvm_vcpu *vcpu, struct vsie_page *vsie_page)

> >> current->thread.gmap_addr, 1); 

> >>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> >> -				   current->thread.gmap_addr);

> >> +				   current->thread.gmap_addr,

> >> NULL); if (rc > 0) {

> >>  		rc = inject_fault(vcpu, rc,

> >>  				  current->thread.gmap_addr,

> >> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu

> >> *vcpu, {

> >>  	if (vsie_page->fault_addr)

> >>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> >> -				      vsie_page->fault_addr);

> >> +				      vsie_page->fault_addr,

> >> NULL);  

> > 

> > Ok

> >   

> >>  	vsie_page->fault_addr = 0;

> >>  }

> >>  

> >>  

> >   

>
Janosch Frank Feb. 5, 2021, 12:56 p.m. UTC | #5
On 2/5/21 1:15 PM, Claudio Imbrenda wrote:
> On Thu, 4 Feb 2021 17:34:00 +0100

> Janosch Frank <frankja@linux.ibm.com> wrote:

> 

>> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:

>>> Extend kvm_s390_shadow_fault to return the pointer to the valid leaf

>>> DAT table entry, or to the invalid entry.

>>>

>>> Also return some flags in the lower bits of the address:

>>> DAT_PROT: indicates that DAT protection applies because of the

>>>           protection bit in the segment (or, if EDAT, region) tables

>>> NOT_PTE: indicates that the address of the DAT table entry returned

>>>          does not refer to a PTE, but to a segment or region table.

>>>

>>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

>>> Cc: stable@vger.kernel.org

>>> ---

>>>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----

>>>  arch/s390/kvm/gaccess.h |  5 ++++-

>>>  arch/s390/kvm/vsie.c    |  8 ++++----

>>>  3 files changed, 30 insertions(+), 9 deletions(-)

>>>

>>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

>>> index 6d6b57059493..2d7bcbfb185e 100644

>>> --- a/arch/s390/kvm/gaccess.c

>>> +++ b/arch/s390/kvm/gaccess.c

>>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct gmap

>>> *sg, unsigned long saddr, rfte.val = ptr;

>>>  			goto shadow_r2t;

>>>  		}

>>> +		*pgt = ptr + vaddr.rfx * 8;  

>>

>> So pgt either is a table entry if rc > 0 or a pointer to the first pte

>> on rc == 0 after this change?

> 

> yes

> 

>> Hrm, if it is really based on RCs than I might be able to come to

>> terms with having two things in a ptr with the name pgt. But it needs

>> a comment change.

> 

> will do.

> 

>>>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,

>>> &rfte.val); if (rc)

>>>  			return rc;

>>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct gmap

>>> *sg, unsigned long saddr, rste.val = ptr;

>>>  			goto shadow_r3t;

>>>  		}

>>> +		*pgt = ptr + vaddr.rsx * 8;

>>>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,

>>> &rste.val); if (rc)

>>>  			return rc;

>>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct gmap

>>> *sg, unsigned long saddr, rtte.val = ptr;

>>>  			goto shadow_sgt;

>>>  		}

>>> +		*pgt = ptr + vaddr.rtx * 8;

>>>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,

>>> &rtte.val); if (rc)

>>>  			return rc;

>>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct gmap

>>> *sg, unsigned long saddr, ste.val = ptr;

>>>  			goto shadow_pgt;

>>>  		}

>>> +		*pgt = ptr + vaddr.sx * 8;

>>>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8,

>>> &ste.val); if (rc)

>>>  			return rc;

>>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct gmap

>>> *sg, unsigned long saddr,

>>>   * @vcpu: virtual cpu

>>>   * @sg: pointer to the shadow guest address space structure

>>>   * @saddr: faulting address in the shadow gmap

>>> + * @pteptr: will contain the address of the faulting DAT table

>>> entry, or of

>>> + *          the valid leaf, plus some flags  

>>

>> pteptr is not the right name if it can be two things

> 

> it cannot be two things there, kvm_s390_shadow_fault always returns a

> DAT _entry_ (pte, segment, region).


And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t *)
as the name would suggest.


> 

>>>   *

>>>   * Returns: - 0 if the shadow fault was successfully resolved

>>>   *	    - > 0 (pgm exception code) on exceptions while

>>> faulting @@ -1165,11 +1171,11 @@ static int

>>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

>>>   *	    - -ENOMEM if out of memory

>>>   */

>>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,

>>> -			  unsigned long saddr)

>>> +			  unsigned long saddr, unsigned long

>>> *pteptr) {

>>>  	union vaddress vaddr;

>>>  	union page_table_entry pte;

>>> -	unsigned long pgt;

>>> +	unsigned long pgt = 0;

>>>  	int dat_protection, fake;

>>>  	int rc;

>>>  

>>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu

>>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;

>>>  		goto shadow_page;

>>>  	}

>>> -	if (!rc)

>>> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px *

>>> 8, &pte.val); +

>>> +	switch (rc) {

>>> +	case PGM_SEGMENT_TRANSLATION:

>>> +	case PGM_REGION_THIRD_TRANS:

>>> +	case PGM_REGION_SECOND_TRANS:

>>> +	case PGM_REGION_FIRST_TRANS:

>>> +		pgt |= NOT_PTE;  

>>

>> GACC_TRANSL_ENTRY_INV ?

> 

> no, this is only for non-pte entries

> 

>>> +		break;

>>> +	case 0:

>>> +		pgt += vaddr.px * 8;

>>> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);

>>> +	}

>>> +	if (*pteptr)

>>> +		*pteptr = pgt | dat_protection * DAT_PROT;

>>>  	if (!rc && pte.i)

>>>  		rc = PGM_PAGE_TRANSLATION;

>>>  	if (!rc && pte.z)

>>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h

>>> index f4c51756c462..66a6e2cec97a 100644

>>> --- a/arch/s390/kvm/gaccess.h

>>> +++ b/arch/s390/kvm/gaccess.h

>>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);

>>>  int ipte_lock_held(struct kvm_vcpu *vcpu);

>>>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,

>>> unsigned long gra); 

>>> +#define DAT_PROT 2  

>>

>> GACC_TRANSL_ENTRY_PROT

> 

> this is also only for non-pte entries

> 

>>> +#define NOT_PTE 4

>>> +

>>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap

>>> *shadow,

>>> -			  unsigned long saddr);

>>> +			  unsigned long saddr, unsigned long

>>> *pteptr); 

>>>  #endif /* __KVM_S390_GACCESS_H */

>>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

>>> index c5d0a58b2c29..7db022141db3 100644

>>> --- a/arch/s390/kvm/vsie.c

>>> +++ b/arch/s390/kvm/vsie.c

>>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,

>>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at

>>> offset *mso* */ prefix += scb_s->mso;

>>>  

>>> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);

>>> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,

>>> NULL); if (!rc && (scb_s->ecb & ECB_TE))

>>>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

>>> -					   prefix + PAGE_SIZE);

>>> +					   prefix + PAGE_SIZE,

>>> NULL); /*

>>>  	 * We don't have to mprotect, we will be called for all

>>> unshadows.

>>>  	 * SIE will detect if protection applies and trigger a

>>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct

>>> kvm_vcpu *vcpu, struct vsie_page *vsie_page)

>>> current->thread.gmap_addr, 1); 

>>>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

>>> -				   current->thread.gmap_addr);

>>> +				   current->thread.gmap_addr,

>>> NULL); if (rc > 0) {

>>>  		rc = inject_fault(vcpu, rc,

>>>  				  current->thread.gmap_addr,

>>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu

>>> *vcpu, {

>>>  	if (vsie_page->fault_addr)

>>>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

>>> -				      vsie_page->fault_addr);

>>> +				      vsie_page->fault_addr,

>>> NULL);  

>>

>> Ok

>>

>>>  	vsie_page->fault_addr = 0;

>>>  }

>>>  

>>>   

>>

>
Claudio Imbrenda Feb. 5, 2021, 2:05 p.m. UTC | #6
On Fri, 5 Feb 2021 13:56:53 +0100
Janosch Frank <frankja@linux.ibm.com> wrote:

> On 2/5/21 1:15 PM, Claudio Imbrenda wrote:

> > On Thu, 4 Feb 2021 17:34:00 +0100

> > Janosch Frank <frankja@linux.ibm.com> wrote:

> >   

> >> On 2/2/21 7:00 PM, Claudio Imbrenda wrote:  

> >>> Extend kvm_s390_shadow_fault to return the pointer to the valid

> >>> leaf DAT table entry, or to the invalid entry.

> >>>

> >>> Also return some flags in the lower bits of the address:

> >>> DAT_PROT: indicates that DAT protection applies because of the

> >>>           protection bit in the segment (or, if EDAT, region)

> >>> tables NOT_PTE: indicates that the address of the DAT table entry

> >>> returned does not refer to a PTE, but to a segment or region

> >>> table.

> >>>

> >>> Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>

> >>> Cc: stable@vger.kernel.org

> >>> ---

> >>>  arch/s390/kvm/gaccess.c | 26 ++++++++++++++++++++++----

> >>>  arch/s390/kvm/gaccess.h |  5 ++++-

> >>>  arch/s390/kvm/vsie.c    |  8 ++++----

> >>>  3 files changed, 30 insertions(+), 9 deletions(-)

> >>>

> >>> diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c

> >>> index 6d6b57059493..2d7bcbfb185e 100644

> >>> --- a/arch/s390/kvm/gaccess.c

> >>> +++ b/arch/s390/kvm/gaccess.c

> >>> @@ -1034,6 +1034,7 @@ static int kvm_s390_shadow_tables(struct

> >>> gmap *sg, unsigned long saddr, rfte.val = ptr;

> >>>  			goto shadow_r2t;

> >>>  		}

> >>> +		*pgt = ptr + vaddr.rfx * 8;    

> >>

> >> So pgt either is a table entry if rc > 0 or a pointer to the first

> >> pte on rc == 0 after this change?  

> > 

> > yes

> >   

> >> Hrm, if it is really based on RCs than I might be able to come to

> >> terms with having two things in a ptr with the name pgt. But it

> >> needs a comment change.  

> > 

> > will do.

> >   

> >>>  		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8,

> >>> &rfte.val); if (rc)

> >>>  			return rc;

> >>> @@ -1060,6 +1061,7 @@ static int kvm_s390_shadow_tables(struct

> >>> gmap *sg, unsigned long saddr, rste.val = ptr;

> >>>  			goto shadow_r3t;

> >>>  		}

> >>> +		*pgt = ptr + vaddr.rsx * 8;

> >>>  		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8,

> >>> &rste.val); if (rc)

> >>>  			return rc;

> >>> @@ -1087,6 +1089,7 @@ static int kvm_s390_shadow_tables(struct

> >>> gmap *sg, unsigned long saddr, rtte.val = ptr;

> >>>  			goto shadow_sgt;

> >>>  		}

> >>> +		*pgt = ptr + vaddr.rtx * 8;

> >>>  		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8,

> >>> &rtte.val); if (rc)

> >>>  			return rc;

> >>> @@ -1123,6 +1126,7 @@ static int kvm_s390_shadow_tables(struct

> >>> gmap *sg, unsigned long saddr, ste.val = ptr;

> >>>  			goto shadow_pgt;

> >>>  		}

> >>> +		*pgt = ptr + vaddr.sx * 8;

> >>>  		rc = gmap_read_table(parent, ptr + vaddr.sx * 8,

> >>> &ste.val); if (rc)

> >>>  			return rc;

> >>> @@ -1157,6 +1161,8 @@ static int kvm_s390_shadow_tables(struct

> >>> gmap *sg, unsigned long saddr,

> >>>   * @vcpu: virtual cpu

> >>>   * @sg: pointer to the shadow guest address space structure

> >>>   * @saddr: faulting address in the shadow gmap

> >>> + * @pteptr: will contain the address of the faulting DAT table

> >>> entry, or of

> >>> + *          the valid leaf, plus some flags    

> >>

> >> pteptr is not the right name if it can be two things  

> > 

> > it cannot be two things there, kvm_s390_shadow_fault always returns

> > a DAT _entry_ (pte, segment, region).  

> 

> And that's exactly what I meant, it's not a pteptr i.e. not a (pte_t

> *) as the name would suggest.


fair enough, I'll rename it to something like entryptr or so

> 

> >   

> >>>   *

> >>>   * Returns: - 0 if the shadow fault was successfully resolved

> >>>   *	    - > 0 (pgm exception code) on exceptions while

> >>> faulting @@ -1165,11 +1171,11 @@ static int

> >>> kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,

> >>>   *	    - -ENOMEM if out of memory

> >>>   */

> >>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,

> >>> -			  unsigned long saddr)

> >>> +			  unsigned long saddr, unsigned long

> >>> *pteptr) {

> >>>  	union vaddress vaddr;

> >>>  	union page_table_entry pte;

> >>> -	unsigned long pgt;

> >>> +	unsigned long pgt = 0;

> >>>  	int dat_protection, fake;

> >>>  	int rc;

> >>>  

> >>> @@ -1191,8 +1197,20 @@ int kvm_s390_shadow_fault(struct kvm_vcpu

> >>> *vcpu, struct gmap *sg, pte.val = pgt + vaddr.px * PAGE_SIZE;

> >>>  		goto shadow_page;

> >>>  	}

> >>> -	if (!rc)

> >>> -		rc = gmap_read_table(sg->parent, pgt + vaddr.px *

> >>> 8, &pte.val); +

> >>> +	switch (rc) {

> >>> +	case PGM_SEGMENT_TRANSLATION:

> >>> +	case PGM_REGION_THIRD_TRANS:

> >>> +	case PGM_REGION_SECOND_TRANS:

> >>> +	case PGM_REGION_FIRST_TRANS:

> >>> +		pgt |= NOT_PTE;    

> >>

> >> GACC_TRANSL_ENTRY_INV ?  

> > 

> > no, this is only for non-pte entries

> >   

> >>> +		break;

> >>> +	case 0:

> >>> +		pgt += vaddr.px * 8;

> >>> +		rc = gmap_read_table(sg->parent, pgt, &pte.val);

> >>> +	}

> >>> +	if (*pteptr)

> >>> +		*pteptr = pgt | dat_protection * DAT_PROT;

> >>>  	if (!rc && pte.i)

> >>>  		rc = PGM_PAGE_TRANSLATION;

> >>>  	if (!rc && pte.z)

> >>> diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h

> >>> index f4c51756c462..66a6e2cec97a 100644

> >>> --- a/arch/s390/kvm/gaccess.h

> >>> +++ b/arch/s390/kvm/gaccess.h

> >>> @@ -359,7 +359,10 @@ void ipte_unlock(struct kvm_vcpu *vcpu);

> >>>  int ipte_lock_held(struct kvm_vcpu *vcpu);

> >>>  int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu,

> >>> unsigned long gra); 

> >>> +#define DAT_PROT 2    

> >>

> >> GACC_TRANSL_ENTRY_PROT  

> > 

> > this is also only for non-pte entries

> >   

> >>> +#define NOT_PTE 4

> >>> +

> >>>  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap

> >>> *shadow,

> >>> -			  unsigned long saddr);

> >>> +			  unsigned long saddr, unsigned long

> >>> *pteptr); 

> >>>  #endif /* __KVM_S390_GACCESS_H */

> >>> diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c

> >>> index c5d0a58b2c29..7db022141db3 100644

> >>> --- a/arch/s390/kvm/vsie.c

> >>> +++ b/arch/s390/kvm/vsie.c

> >>> @@ -619,10 +619,10 @@ static int map_prefix(struct kvm_vcpu *vcpu,

> >>> struct vsie_page *vsie_page) /* with mso/msl, the prefix lies at

> >>> offset *mso* */ prefix += scb_s->mso;

> >>>  

> >>> -	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> >>> prefix);

> >>> +	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix,

> >>> NULL); if (!rc && (scb_s->ecb & ECB_TE))

> >>>  		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> >>> -					   prefix + PAGE_SIZE);

> >>> +					   prefix + PAGE_SIZE,

> >>> NULL); /*

> >>>  	 * We don't have to mprotect, we will be called for all

> >>> unshadows.

> >>>  	 * SIE will detect if protection applies and trigger a

> >>> validity. @@ -913,7 +913,7 @@ static int handle_fault(struct

> >>> kvm_vcpu *vcpu, struct vsie_page *vsie_page)

> >>> current->thread.gmap_addr, 1); 

> >>>  	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> >>> -				   current->thread.gmap_addr);

> >>> +				   current->thread.gmap_addr,

> >>> NULL); if (rc > 0) {

> >>>  		rc = inject_fault(vcpu, rc,

> >>>  				  current->thread.gmap_addr,

> >>> @@ -935,7 +935,7 @@ static void handle_last_fault(struct kvm_vcpu

> >>> *vcpu, {

> >>>  	if (vsie_page->fault_addr)

> >>>  		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,

> >>> -				      vsie_page->fault_addr);

> >>> +				      vsie_page->fault_addr,

> >>> NULL);    

> >>

> >> Ok

> >>  

> >>>  	vsie_page->fault_addr = 0;

> >>>  }

> >>>  

> >>>     

> >>  

> >   

>
diff mbox series

Patch

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 6d6b57059493..2d7bcbfb185e 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1034,6 +1034,7 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			rfte.val = ptr;
 			goto shadow_r2t;
 		}
+		*pgt = ptr + vaddr.rfx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rfx * 8, &rfte.val);
 		if (rc)
 			return rc;
@@ -1060,6 +1061,7 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			rste.val = ptr;
 			goto shadow_r3t;
 		}
+		*pgt = ptr + vaddr.rsx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rsx * 8, &rste.val);
 		if (rc)
 			return rc;
@@ -1087,6 +1089,7 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			rtte.val = ptr;
 			goto shadow_sgt;
 		}
+		*pgt = ptr + vaddr.rtx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.rtx * 8, &rtte.val);
 		if (rc)
 			return rc;
@@ -1123,6 +1126,7 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 			ste.val = ptr;
 			goto shadow_pgt;
 		}
+		*pgt = ptr + vaddr.sx * 8;
 		rc = gmap_read_table(parent, ptr + vaddr.sx * 8, &ste.val);
 		if (rc)
 			return rc;
@@ -1157,6 +1161,8 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
  * @vcpu: virtual cpu
  * @sg: pointer to the shadow guest address space structure
  * @saddr: faulting address in the shadow gmap
+ * @pteptr: will contain the address of the faulting DAT table entry, or of
+ *          the valid leaf, plus some flags
  *
  * Returns: - 0 if the shadow fault was successfully resolved
  *	    - > 0 (pgm exception code) on exceptions while faulting
@@ -1165,11 +1171,11 @@  static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
  *	    - -ENOMEM if out of memory
  */
 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
-			  unsigned long saddr)
+			  unsigned long saddr, unsigned long *pteptr)
 {
 	union vaddress vaddr;
 	union page_table_entry pte;
-	unsigned long pgt;
+	unsigned long pgt = 0;
 	int dat_protection, fake;
 	int rc;
 
@@ -1191,8 +1197,20 @@  int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
 		pte.val = pgt + vaddr.px * PAGE_SIZE;
 		goto shadow_page;
 	}
-	if (!rc)
-		rc = gmap_read_table(sg->parent, pgt + vaddr.px * 8, &pte.val);
+
+	switch (rc) {
+	case PGM_SEGMENT_TRANSLATION:
+	case PGM_REGION_THIRD_TRANS:
+	case PGM_REGION_SECOND_TRANS:
+	case PGM_REGION_FIRST_TRANS:
+		pgt |= NOT_PTE;
+		break;
+	case 0:
+		pgt += vaddr.px * 8;
+		rc = gmap_read_table(sg->parent, pgt, &pte.val);
+	}
+	if (*pteptr)
+		*pteptr = pgt | dat_protection * DAT_PROT;
 	if (!rc && pte.i)
 		rc = PGM_PAGE_TRANSLATION;
 	if (!rc && pte.z)
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index f4c51756c462..66a6e2cec97a 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -359,7 +359,10 @@  void ipte_unlock(struct kvm_vcpu *vcpu);
 int ipte_lock_held(struct kvm_vcpu *vcpu);
 int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
 
+#define DAT_PROT 2
+#define NOT_PTE 4
+
 int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *shadow,
-			  unsigned long saddr);
+			  unsigned long saddr, unsigned long *pteptr);
 
 #endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index c5d0a58b2c29..7db022141db3 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -619,10 +619,10 @@  static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 	/* with mso/msl, the prefix lies at offset *mso* */
 	prefix += scb_s->mso;
 
-	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
+	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix, NULL);
 	if (!rc && (scb_s->ecb & ECB_TE))
 		rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-					   prefix + PAGE_SIZE);
+					   prefix + PAGE_SIZE, NULL);
 	/*
 	 * We don't have to mprotect, we will be called for all unshadows.
 	 * SIE will detect if protection applies and trigger a validity.
@@ -913,7 +913,7 @@  static int handle_fault(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
 				    current->thread.gmap_addr, 1);
 
 	rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-				   current->thread.gmap_addr);
+				   current->thread.gmap_addr, NULL);
 	if (rc > 0) {
 		rc = inject_fault(vcpu, rc,
 				  current->thread.gmap_addr,
@@ -935,7 +935,7 @@  static void handle_last_fault(struct kvm_vcpu *vcpu,
 {
 	if (vsie_page->fault_addr)
 		kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
-				      vsie_page->fault_addr);
+				      vsie_page->fault_addr, NULL);
 	vsie_page->fault_addr = 0;
 }