Message ID | 20200701162959.9814-5-vicooodin@gmail.com |
---|---|
State | New |
Headers | show |
Series | Add new board: Xen guest for ARM64 | expand |
> Subject: [PATCH 04/17] xen: Add essential and required interface headers > > From: Oleksandr Andrushchenko <oleksandr_andrushchenko at epam.com> > > Add essential and required Xen interface headers only taken from > the stable Linux kernel stable/linux-5.7.y at commit > 66dfe45221605e11f38a0bf5eb2ee808cea7cfe7. Please use commit <12+> ("commit header") > > These are better suited for U-boot than the original headers > from Xen as they are the stripped versions of the same. > > At the same time use public protocols from Xen RELEASE-4.13.1, at > commit 6278553325a9f76d37811923221b21db3882e017 Please use commit <12+> ("commit header") Then: Acked-by: Peng Fan <peng.fan at nxp.com> > as those have more comments in them. > > Signed-off-by: Oleksandr Andrushchenko > <oleksandr_andrushchenko at epam.com> > Signed-off-by: Anastasiia Lukianenko <anastasiia_lukianenko at epam.com> > --- > include/xen/arm/interface.h | 88 ++++ > include/xen/interface/event_channel.h | 281 ++++++++++ > include/xen/interface/grant_table.h | 582 +++++++++++++++++++++ > include/xen/interface/hvm/hvm_op.h | 69 +++ > include/xen/interface/hvm/params.h | 127 +++++ > include/xen/interface/io/blkif.h | 726 > ++++++++++++++++++++++++++ > include/xen/interface/io/console.h | 56 ++ > include/xen/interface/io/protocols.h | 42 ++ > include/xen/interface/io/ring.h | 479 +++++++++++++++++ > include/xen/interface/io/xenbus.h | 81 +++ > include/xen/interface/io/xs_wire.h | 151 ++++++ > include/xen/interface/memory.h | 332 ++++++++++++ > include/xen/interface/sched.h | 188 +++++++ > include/xen/interface/xen.h | 225 ++++++++ > 14 files changed, 3427 insertions(+) > create mode 100644 include/xen/arm/interface.h > create mode 100644 include/xen/interface/event_channel.h > create mode 100644 include/xen/interface/grant_table.h > create mode 100644 include/xen/interface/hvm/hvm_op.h > create mode 100644 include/xen/interface/hvm/params.h > create mode 100644 include/xen/interface/io/blkif.h > create mode 100644 include/xen/interface/io/console.h > create mode 100644 include/xen/interface/io/protocols.h > create mode 100644 include/xen/interface/io/ring.h > create mode 100644 include/xen/interface/io/xenbus.h > create mode 100644 include/xen/interface/io/xs_wire.h > create mode 100644 include/xen/interface/memory.h > create mode 100644 include/xen/interface/sched.h > create mode 100644 include/xen/interface/xen.h > > diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h > new file mode 100644 > index 0000000000..79d5ae8563 > --- /dev/null > +++ b/include/xen/arm/interface.h > @@ -0,0 +1,88 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/************************************************************ > ****************** > + * Guest OS interface to ARM Xen. > + * > + * Stefano Stabellini <stefano.stabellini at eu.citrix.com>, Citrix, 2012 > + */ > + > +#ifndef _ASM_ARM_XEN_INTERFACE_H > +#define _ASM_ARM_XEN_INTERFACE_H > + > +#ifndef __ASSEMBLY__ > +#include <linux/types.h> > +#endif > + > +#define uint64_aligned_t u64 __attribute__((aligned(8))) > + > +#define __DEFINE_GUEST_HANDLE(name, type) \ > + typedef struct { union { type *p; uint64_aligned_t q; }; } \ > + __guest_handle_ ## name > + > +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ > + __DEFINE_GUEST_HANDLE(name, struct name) > +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, > name) > +#define GUEST_HANDLE(name) __guest_handle_ ## name > + > +#define set_xen_guest_handle(hnd, val) \ > + do { \ > + if (sizeof(hnd) == 8) \ > + *(u64 *)&(hnd) = 0; \ > + (hnd).p = val; \ > + } while (0) > + > +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op > + > +#ifndef __ASSEMBLY__ > +/* Explicitly size integers that represent pfns in the interface with > + * Xen so that we can have one ABI that works for 32 and 64 bit guests. > + * Note that this means that the xen_pfn_t type may be capable of > + * representing pfn's which the guest cannot represent in its own pfn > + * type. However since pfn space is controlled by the guest this is > + * fine since it simply wouldn't be able to create any sure pfns in > + * the first place. > + */ > +typedef u64 xen_pfn_t; > +#define PRI_xen_pfn "llx" > +typedef u64 xen_ulong_t; > +#define PRI_xen_ulong "llx" > +typedef s64 xen_long_t; > +#define PRI_xen_long "llx" > +/* Guest handles for primitive C types. */ > +__DEFINE_GUEST_HANDLE(uchar, unsigned char); > +__DEFINE_GUEST_HANDLE(uint, unsigned int); > +DEFINE_GUEST_HANDLE(char); > +DEFINE_GUEST_HANDLE(int); > +DEFINE_GUEST_HANDLE(void); > +DEFINE_GUEST_HANDLE(u64); > +DEFINE_GUEST_HANDLE(u32); > +DEFINE_GUEST_HANDLE(xen_pfn_t); > +DEFINE_GUEST_HANDLE(xen_ulong_t); > + > +/* Maximum number of virtual CPUs in multi-processor guests. */ > +#define MAX_VIRT_CPUS 1 > + > +struct arch_vcpu_info { }; > +struct arch_shared_info { }; > + > +/* TODO: Move pvclock definitions some place arch independent */ > +struct pvclock_vcpu_time_info { > + u32 version; > + u32 pad0; > + u64 tsc_timestamp; > + u64 system_time; > + u32 tsc_to_system_mul; > + s8 tsc_shift; > + u8 flags; > + u8 pad[2]; > +} __attribute__((__packed__)); /* 32 bytes */ > + > +/* It is OK to have a 12 bytes struct with no padding because it is packed */ > +struct pvclock_wall_clock { > + u32 version; > + u32 sec; > + u32 nsec; > + u32 sec_hi; > +} __attribute__((__packed__)); > +#endif > + > +#endif /* _ASM_ARM_XEN_INTERFACE_H */ > diff --git a/include/xen/interface/event_channel.h > b/include/xen/interface/event_channel.h > new file mode 100644 > index 0000000000..8174999c2f > --- /dev/null > +++ b/include/xen/interface/event_channel.h > @@ -0,0 +1,281 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/************************************************************ > ****************** > + * event_channel.h > + * > + * Event channels between domains. > + * > + * Copyright (c) 2003-2004, K A Fraser. > + */ > + > +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ > +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ > + > +#include <xen/interface/xen.h> > + > +typedef u32 evtchn_port_t; > +DEFINE_GUEST_HANDLE(evtchn_port_t); > + > +/* > + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as > + * accepting interdomain bindings from domain <remote_dom>. A fresh port > + * is allocated in <dom> and returned as <port>. > + * NOTES: > + * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. > + * 2. <rdom> may be DOMID_SELF, allowing loopback connections. > + */ > +#define EVTCHNOP_alloc_unbound 6 > +struct evtchn_alloc_unbound { > + /* IN parameters */ > + domid_t dom, remote_dom; > + /* OUT parameters */ > + evtchn_port_t port; > +}; > + > +/* > + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel > between > + * the calling domain and <remote_dom>. <remote_dom,remote_port> must > identify > + * a port that is unbound and marked as accepting bindings from the calling > + * domain. A fresh port is allocated in the calling domain and returned as > + * <local_port>. > + * NOTES: > + * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. > + */ > +#define EVTCHNOP_bind_interdomain 0 > +struct evtchn_bind_interdomain { > + /* IN parameters. */ > + domid_t remote_dom; > + evtchn_port_t remote_port; > + /* OUT parameters. */ > + evtchn_port_t local_port; > +}; > + > +/* > + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on > specified > + * vcpu. > + * NOTES: > + * 1. A virtual IRQ may be bound to at most one event channel per vcpu. > + * 2. The allocated event channel is bound to the specified vcpu. The > binding > + * may not be changed. > + */ > +#define EVTCHNOP_bind_virq 1 > +struct evtchn_bind_virq { > + /* IN parameters. */ > + u32 virq; > + u32 vcpu; > + /* OUT parameters. */ > + evtchn_port_t port; > +}; > + > +/* > + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. > + * NOTES: > + * 1. A physical IRQ may be bound to at most one event channel per > domain. > + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. > + */ > +#define EVTCHNOP_bind_pirq 2 > +struct evtchn_bind_pirq { > + /* IN parameters. */ > + u32 pirq; > +#define BIND_PIRQ__WILL_SHARE 1 > + u32 flags; /* BIND_PIRQ__* */ > + /* OUT parameters. */ > + evtchn_port_t port; > +}; > + > +/* > + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. > + * NOTES: > + * 1. The allocated event channel is bound to the specified vcpu. The > binding > + * may not be changed. > + */ > +#define EVTCHNOP_bind_ipi 7 > +struct evtchn_bind_ipi { > + u32 vcpu; > + /* OUT parameters. */ > + evtchn_port_t port; > +}; > + > +/* > + * EVTCHNOP_close: Close a local event channel <port>. If the channel is > + * interdomain then the remote end is placed in the unbound state > + * (EVTCHNSTAT_unbound), awaiting a new connection. > + */ > +#define EVTCHNOP_close 3 > +struct evtchn_close { > + /* IN parameters. */ > + evtchn_port_t port; > +}; > + > +/* > + * EVTCHNOP_send: Send an event to the remote end of the channel whose > local > + * endpoint is <port>. > + */ > +#define EVTCHNOP_send 4 > +struct evtchn_send { > + /* IN parameters. */ > + evtchn_port_t port; > +}; > + > +/* > + * EVTCHNOP_status: Get the current status of the communication channel > which > + * has an endpoint at <dom, port>. > + * NOTES: > + * 1. <dom> may be specified as DOMID_SELF. > + * 2. Only a sufficiently-privileged domain may obtain the status of an > event > + * channel for which <dom> is not DOMID_SELF. > + */ > +#define EVTCHNOP_status 5 > +struct evtchn_status { > + /* IN parameters */ > + domid_t dom; > + evtchn_port_t port; > + /* OUT parameters */ > +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ > +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom > connection.*/ > +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote > domain. */ > +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. > */ > +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line > */ > +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line > */ > + u32 status; > + u32 vcpu; /* VCPU to which this channel is bound. */ > + union { > + struct { > + domid_t dom; > + } unbound; /* EVTCHNSTAT_unbound */ > + struct { > + domid_t dom; > + evtchn_port_t port; > + } interdomain; /* EVTCHNSTAT_interdomain */ > + u32 pirq; /* EVTCHNSTAT_pirq */ > + u32 virq; /* EVTCHNSTAT_virq */ > + } u; > +}; > + > +/* > + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when > an > + * event is pending. > + * NOTES: > + * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised > + * the binding. This binding cannot be changed. > + * 2. All other channels notify vcpu0 by default. This default is set when > + * the channel is allocated (a port that is freed and subsequently reused > + * has its binding reset to vcpu0). > + */ > +#define EVTCHNOP_bind_vcpu 8 > +struct evtchn_bind_vcpu { > + /* IN parameters. */ > + evtchn_port_t port; > + u32 vcpu; > +}; > + > +/* > + * EVTCHNOP_unmask: Unmask the specified local event-channel port and > deliver > + * a notification to the appropriate VCPU if an event is pending. > + */ > +#define EVTCHNOP_unmask 9 > +struct evtchn_unmask { > + /* IN parameters. */ > + evtchn_port_t port; > +}; > + > +/* > + * EVTCHNOP_reset: Close all event channels associated with specified > domain. > + * NOTES: > + * 1. <dom> may be specified as DOMID_SELF. > + * 2. Only a sufficiently-privileged domain may specify other than > DOMID_SELF. > + */ > +#define EVTCHNOP_reset 10 > +struct evtchn_reset { > + /* IN parameters. */ > + domid_t dom; > +}; > + > +typedef struct evtchn_reset evtchn_reset_t; > + > +/* > + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. > + */ > +#define EVTCHNOP_init_control 11 > +struct evtchn_init_control { > + /* IN parameters. */ > + u64 control_gfn; > + u32 offset; > + u32 vcpu; > + /* OUT parameters. */ > + u8 link_bits; > + u8 _pad[7]; > +}; > + > +/* > + * EVTCHNOP_expand_array: add an additional page to the event array. > + */ > +#define EVTCHNOP_expand_array 12 > +struct evtchn_expand_array { > + /* IN parameters. */ > + u64 array_gfn; > +}; > + > +/* > + * EVTCHNOP_set_priority: set the priority for an event channel. > + */ > +#define EVTCHNOP_set_priority 13 > +struct evtchn_set_priority { > + /* IN parameters. */ > + evtchn_port_t port; > + u32 priority; > +}; > + > +struct evtchn_op { > + u32 cmd; /* EVTCHNOP_* */ > + union { > + struct evtchn_alloc_unbound alloc_unbound; > + struct evtchn_bind_interdomain bind_interdomain; > + struct evtchn_bind_virq bind_virq; > + struct evtchn_bind_pirq bind_pirq; > + struct evtchn_bind_ipi bind_ipi; > + struct evtchn_close close; > + struct evtchn_send send; > + struct evtchn_status status; > + struct evtchn_bind_vcpu bind_vcpu; > + struct evtchn_unmask unmask; > + } u; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); > + > +/* > + * 2-level ABI > + */ > + > +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * > sizeof(xen_ulong_t) * 64) > + > +/* > + * FIFO ABI > + */ > + > +/* Events may have priorities from 0 (highest) to 15 (lowest). */ > +#define EVTCHN_FIFO_PRIORITY_MAX 0 > +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 > +#define EVTCHN_FIFO_PRIORITY_MIN 15 > + > +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) > + > +typedef u32 event_word_t; > + > +#define EVTCHN_FIFO_PENDING 31 > +#define EVTCHN_FIFO_MASKED 30 > +#define EVTCHN_FIFO_LINKED 29 > +#define EVTCHN_FIFO_BUSY 28 > + > +#define EVTCHN_FIFO_LINK_BITS 17 > +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) > + > +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) > + > +struct evtchn_fifo_control_block { > + u32 ready; > + u32 _rsvd; > + event_word_t head[EVTCHN_FIFO_MAX_QUEUES]; > +}; > + > +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ > diff --git a/include/xen/interface/grant_table.h > b/include/xen/interface/grant_table.h > new file mode 100644 > index 0000000000..197a0d0d58 > --- /dev/null > +++ b/include/xen/interface/grant_table.h > @@ -0,0 +1,582 @@ > +/************************************************************ > ****************** > + * grant_table.h > + * > + * Interface for granting foreign access to page frames, and receiving > + * page-ownership transfers. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Copyright (c) 2004, K A Fraser > + */ > + > +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ > +#define __XEN_PUBLIC_GRANT_TABLE_H__ > + > +#include <xen/interface/xen.h> > + > +/*********************************** > + * GRANT TABLE REPRESENTATION > + */ > + > +/* Some rough guidelines on accessing and updating grant-table entries > + * in a concurrency-safe manner. For more information, Linux contains a > + * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). > + * > + * NB. WMB is a no-op on current-generation x86 processors. However, a > + * compiler barrier will still be required. > + * > + * Introducing a valid entry into the grant table: > + * 1. Write ent->domid. > + * 2. Write ent->frame: > + * GTF_permit_access: Frame to which access is permitted. > + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new > + * frame, or zero if none. > + * 3. Write memory barrier (WMB). > + * 4. Write ent->flags, inc. valid type. > + * > + * Invalidating an unused GTF_permit_access entry: > + * 1. flags = ent->flags. > + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). > + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). > + * NB. No need for WMB as reuse of entry is control-dependent on success > of > + * step 3, and all architectures guarantee ordering of ctrl-dep writes. > + * > + * Invalidating an in-use GTF_permit_access entry: > + * This cannot be done directly. Request assistance from the domain > controller > + * which can set a timeout on the use of a grant entry and take necessary > + * action. (NB. This is not yet implemented!). > + * > + * Invalidating an unused GTF_accept_transfer entry: > + * 1. flags = ent->flags. > + * 2. Observe that !(flags & GTF_transfer_committed). [*] > + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). > + * NB. No need for WMB as reuse of entry is control-dependent on success > of > + * step 3, and all architectures guarantee ordering of ctrl-dep writes. > + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. > + * The guest must /not/ modify the grant entry until the address of > the > + * transferred frame is written. It is safe for the guest to spin waiting > + * for this to occur (detect by observing GTF_transfer_completed in > + * ent->flags). > + * > + * Invalidating a committed GTF_accept_transfer entry: > + * 1. Wait for (ent->flags & GTF_transfer_completed). > + * > + * Changing a GTF_permit_access from writable to read-only: > + * Use SMP-safe CMPXCHG to set GTF_readonly, while > checking !GTF_writing. > + * > + * Changing a GTF_permit_access from read-only to writable: > + * Use SMP-safe bit-setting instruction. > + */ > + > +/* > + * Reference to a grant entry in a specified domain's grant table. > + */ > +typedef u32 grant_ref_t; > + > +/* > + * A grant table comprises a packed array of grant entries in one or more > + * page frames shared between Xen and a guest. > + * [XEN]: This field is written by Xen and read by the sharing guest. > + * [GST]: This field is written by the guest and read by Xen. > + */ > + > +/* > + * Version 1 of the grant table entry structure is maintained purely > + * for backwards compatibility. New guests should use version 2. > + */ > +struct grant_entry_v1 { > + /* GTF_xxx: various type and flag information. [XEN,GST] */ > + u16 flags; > + /* The domain being granted foreign privileges. [GST] */ > + domid_t domid; > + /* > + * GTF_permit_access: Frame that @domid is allowed to map and > access. [GST] > + * GTF_accept_transfer: Frame whose ownership transferred by > @domid. [XEN] > + */ > + u32 frame; > +}; > + > +/* > + * Type of grant entry. > + * GTF_invalid: This grant entry grants no privileges. > + * GTF_permit_access: Allow @domid to map/access @frame. > + * GTF_accept_transfer: Allow @domid to transfer ownership of one page > frame > + * to this guest. Xen writes the page number to > @frame. > + * GTF_transitive: Allow @domid to transitively access a subrange of > + * @trans_grant in @trans_domid. No mappings are > allowed. > + */ > +#define GTF_invalid (0U << 0) > +#define GTF_permit_access (1U << 0) > +#define GTF_accept_transfer (2U << 0) > +#define GTF_transitive (3U << 0) > +#define GTF_type_mask (3U << 0) > + > +/* > + * Subflags for GTF_permit_access. > + * GTF_readonly: Restrict @domid to read-only mappings and accesses. > [GST] > + * GTF_reading: Grant entry is currently mapped for reading by @domid. > [XEN] > + * GTF_writing: Grant entry is currently mapped for writing by @domid. > [XEN] > + * GTF_sub_page: Grant access to only a subrange of the page. @domid > + * will only be allowed to copy from the grant, and not > + * map it. [GST] > + */ > +#define _GTF_readonly (2) > +#define GTF_readonly (1U << _GTF_readonly) > +#define _GTF_reading (3) > +#define GTF_reading (1U << _GTF_reading) > +#define _GTF_writing (4) > +#define GTF_writing (1U << _GTF_writing) > +#define _GTF_sub_page (8) > +#define GTF_sub_page (1U << _GTF_sub_page) > + > +/* > + * Subflags for GTF_accept_transfer: > + * GTF_transfer_committed: Xen sets this flag to indicate that it is > committed > + * to transferring ownership of a page frame. When a guest sees this > flag > + * it must /not/ modify the grant entry until GTF_transfer_completed > is > + * set by Xen. > + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag > + * after reading GTF_transfer_committed. Xen will always write the > frame > + * address, followed by ORing this flag, in a timely manner. > + */ > +#define _GTF_transfer_committed (2) > +#define GTF_transfer_committed (1U << _GTF_transfer_committed) > +#define _GTF_transfer_completed (3) > +#define GTF_transfer_completed (1U << _GTF_transfer_completed) > + > +/* > + * Version 2 grant table entries. These fulfil the same role as > + * version 1 entries, but can represent more complicated operations. > + * Any given domain will have either a version 1 or a version 2 table, > + * and every entry in the table will be the same version. > + * > + * The interface by which domains use grant references does not depend > + * on the grant table version in use by the other domain. > + */ > + > +/* > + * Version 1 and version 2 grant entries share a common prefix. The > + * fields of the prefix are documented as part of struct > + * grant_entry_v1. > + */ > +struct grant_entry_header { > + u16 flags; > + domid_t domid; > +}; > + > +/* > + * Version 2 of the grant entry structure, here is a union because three > + * different types are suppotted: full_page, sub_page and transitive. > + */ > +union grant_entry_v2 { > + struct grant_entry_header hdr; > + > + /* > + * This member is used for V1-style full page grants, where either: > + * > + * -- hdr.type is GTF_accept_transfer, or > + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. > + * > + * In that case, the frame field has the same semantics as the > + * field of the same name in the V1 entry structure. > + */ > + struct { > + struct grant_entry_header hdr; > + u32 pad0; > + u64 frame; > + } full_page; > + > + /* > + * If the grant type is GTF_grant_access and GTF_sub_page is set, > + * @domid is allowed to access bytes [@page_off, at page_off+@length) > + * in frame @frame. > + */ > + struct { > + struct grant_entry_header hdr; > + u16 page_off; > + u16 length; > + u64 frame; > + } sub_page; > + > + /* > + * If the grant is GTF_transitive, @domid is allowed to use the > + * grant @gref in domain @trans_domid, as if it was the local > + * domain. Obviously, the transitive access must be compatible > + * with the original grant. > + */ > + struct { > + struct grant_entry_header hdr; > + domid_t trans_domid; > + u16 pad0; > + grant_ref_t gref; > + } transitive; > + > + u32 __spacer[4]; /* Pad to a power of two */ > +}; > + > +typedef u16 grant_status_t; > + > +/*********************************** > + * GRANT TABLE QUERIES AND USES > + */ > + > +/* > + * Handle to track a mapping created via a grant reference. > + */ > +typedef u32 grant_handle_t; > + > +/* > + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access > + * by devices and/or host CPUs. If successful, <handle> is a tracking number > + * that must be presented later to destroy the mapping(s). On error, > <handle> > + * is a negative status code. > + * NOTES: > + * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the > address > + * via which I/O devices may access the granted frame. > + * 2. If GNTMAP_host_map is specified then a mapping will be added at > + * either a host virtual address in the current address space, or at > + * a PTE at the specified machine address. The type of mapping to > + * perform is selected through the GNTMAP_contains_pte flag, and the > + * address is specified in <host_addr>. > + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. > If a > + * host mapping is destroyed by other means then it is *NOT* > guaranteed > + * to be accounted to the correct grant reference! > + */ > +#define GNTTABOP_map_grant_ref 0 > +struct gnttab_map_grant_ref { > + /* IN parameters. */ > + u64 host_addr; > + u32 flags; /* GNTMAP_* */ > + grant_ref_t ref; > + domid_t dom; > + /* OUT parameters. */ > + s16 status; /* GNTST_* */ > + grant_handle_t handle; > + u64 dev_bus_addr; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); > + > +/* > + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference > mappings > + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that > + * field is ignored. If non-zero, they must refer to a device/host mapping > + * that is tracked by <handle> > + * NOTES: > + * 1. The call may fail in an undefined manner if either mapping is not > + * tracked by <handle>. > + * 3. After executing a batch of unmaps, it is guaranteed that no stale > + * mappings will remain in the device or host TLBs. > + */ > +#define GNTTABOP_unmap_grant_ref 1 > +struct gnttab_unmap_grant_ref { > + /* IN parameters. */ > + u64 host_addr; > + u64 dev_bus_addr; > + grant_handle_t handle; > + /* OUT parameters. */ > + s16 status; /* GNTST_* */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); > + > +/* > + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at > least > + * <nr_frames> pages. The frame addresses are written to the <frame_list>. > + * Only <nr_frames> addresses are written, even if the table is larger. > + * NOTES: > + * 1. <dom> may be specified as DOMID_SELF. > + * 2. Only a sufficiently-privileged domain may specify <dom> != > DOMID_SELF. > + * 3. Xen may not support more than a single grant-table page per domain. > + */ > +#define GNTTABOP_setup_table 2 > +struct gnttab_setup_table { > + /* IN parameters. */ > + domid_t dom; > + u32 nr_frames; > + /* OUT parameters. */ > + s16 status; /* GNTST_* */ > + > + GUEST_HANDLE(xen_pfn_t)frame_list; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); > + > +/* > + * GNTTABOP_dump_table: Dump the contents of the grant table to the > + * xen console. Debugging use only. > + */ > +#define GNTTABOP_dump_table 3 > +struct gnttab_dump_table { > + /* IN parameters. */ > + domid_t dom; > + /* OUT parameters. */ > + s16 status; /* GNTST_* */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); > + > +/* > + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The > + * foreign domain has previously registered its interest in the transfer via > + * <domid, ref>. > + * > + * Note that, even if the transfer fails, the specified page no longer belongs > + * to the calling domain *unless* the error is GNTST_bad_page. > + */ > +#define GNTTABOP_transfer 4 > +struct gnttab_transfer { > + /* IN parameters. */ > + xen_pfn_t mfn; > + domid_t domid; > + grant_ref_t ref; > + /* OUT parameters. */ > + s16 status; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); > + > +/* > + * GNTTABOP_copy: Hypervisor based copy > + * source and destinations can be eithers MFNs or, for foreign domains, > + * grant references. the foreign domain has to grant read/write access > + * in its grant table. > + * > + * The flags specify what type source and destinations are (either MFN > + * or grant reference). > + * > + * Note that this can also be used to copy data between two domains > + * via a third party if the source and destination domains had previously > + * grant appropriate access to their pages to the third party. > + * > + * source_offset specifies an offset in the source frame, dest_offset > + * the offset in the target frame and len specifies the number of > + * bytes to be copied. > + */ > + > +#define _GNTCOPY_source_gref (0) > +#define GNTCOPY_source_gref (1 << _GNTCOPY_source_gref) > +#define _GNTCOPY_dest_gref (1) > +#define GNTCOPY_dest_gref (1 << _GNTCOPY_dest_gref) > + > +#define GNTTABOP_copy 5 > +struct gnttab_copy { > + /* IN parameters. */ > + struct { > + union { > + grant_ref_t ref; > + xen_pfn_t gmfn; > + } u; > + domid_t domid; > + u16 offset; > + } source, dest; > + u16 len; > + u16 flags; /* GNTCOPY_* */ > + /* OUT parameters. */ > + s16 status; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); > + > +/* > + * GNTTABOP_query_size: Query the current and maximum sizes of the > shared > + * grant table. > + * NOTES: > + * 1. <dom> may be specified as DOMID_SELF. > + * 2. Only a sufficiently-privileged domain may specify <dom> != > DOMID_SELF. > + */ > +#define GNTTABOP_query_size 6 > +struct gnttab_query_size { > + /* IN parameters. */ > + domid_t dom; > + /* OUT parameters. */ > + u32 nr_frames; > + u32 max_nr_frames; > + s16 status; /* GNTST_* */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); > + > +/* > + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference > mappings > + * tracked by <handle> but atomically replace the page table entry with one > + * pointing to the machine address under <new_addr>. <new_addr> will > be > + * redirected to the null entry. > + * NOTES: > + * 1. The call may fail in an undefined manner if either mapping is not > + * tracked by <handle>. > + * 2. After executing a batch of unmaps, it is guaranteed that no stale > + * mappings will remain in the device or host TLBs. > + */ > +#define GNTTABOP_unmap_and_replace 7 > +struct gnttab_unmap_and_replace { > + /* IN parameters. */ > + u64 host_addr; > + u64 new_addr; > + grant_handle_t handle; > + /* OUT parameters. */ > + s16 status; /* GNTST_* */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); > + > +/* > + * GNTTABOP_set_version: Request a particular version of the grant > + * table shared table structure. This operation can only be performed > + * once in any given domain. It must be performed before any grants > + * are activated; otherwise, the domain will be stuck with version 1. > + * The only defined versions are 1 and 2. > + */ > +#define GNTTABOP_set_version 8 > +struct gnttab_set_version { > + /* IN parameters */ > + u32 version; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version); > + > +/* > + * GNTTABOP_get_status_frames: Get the list of frames used to store grant > + * status for <dom>. In grant format version 2, the status is separated > + * from the other shared grant fields to allow more efficient synchronization > + * using barriers instead of atomic cmpexch operations. > + * <nr_frames> specify the size of vector <frame_list>. > + * The frame addresses are returned in the <frame_list>. > + * Only <nr_frames> addresses are returned, even if the table is larger. > + * NOTES: > + * 1. <dom> may be specified as DOMID_SELF. > + * 2. Only a sufficiently-privileged domain may specify <dom> != > DOMID_SELF. > + */ > +#define GNTTABOP_get_status_frames 9 > +struct gnttab_get_status_frames { > + /* IN parameters. */ > + u32 nr_frames; > + domid_t dom; > + /* OUT parameters. */ > + s16 status; /* GNTST_* */ > + > + GUEST_HANDLE(u64)frame_list; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); > + > +/* > + * GNTTABOP_get_version: Get the grant table version which is in > + * effect for domain <dom>. > + */ > +#define GNTTABOP_get_version 10 > +struct gnttab_get_version { > + /* IN parameters */ > + domid_t dom; > + u16 pad; > + /* OUT parameters */ > + u32 version; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); > + > +/* > + * Issue one or more cache maintenance operations on a portion of a > + * page granted to the calling domain by a foreign domain. > + */ > +#define GNTTABOP_cache_flush 12 > +struct gnttab_cache_flush { > + union { > + u64 dev_bus_addr; > + grant_ref_t ref; > + } a; > + u16 offset; /* offset from start of grant */ > + u16 length; /* size within the grant */ > +#define GNTTAB_CACHE_CLEAN (1 << 0) > +#define GNTTAB_CACHE_INVAL (1 << 1) > +#define GNTTAB_CACHE_SOURCE_GREF (1 << 31) > + u32 op; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); > + > +/* > + * Bitfield values for update_pin_status.flags. > + */ > + /* Map the grant entry for access by I/O devices. */ > +#define _GNTMAP_device_map (0) > +#define GNTMAP_device_map (1 << _GNTMAP_device_map) > +/* Map the grant entry for access by host CPUs. */ > +#define _GNTMAP_host_map (1) > +#define GNTMAP_host_map (1 << _GNTMAP_host_map) > +/* Accesses to the granted frame will be restricted to read-only access. */ > +#define _GNTMAP_readonly (2) > +#define GNTMAP_readonly (1 << _GNTMAP_readonly) > +/* > + * GNTMAP_host_map subflag: > + * 0 => The host mapping is usable only by the guest OS. > + * 1 => The host mapping is usable by guest OS + current application. > + */ > +#define _GNTMAP_application_map (3) > +#define GNTMAP_application_map (1 << _GNTMAP_application_map) > + > +/* > + * GNTMAP_contains_pte subflag: > + * 0 => This map request contains a host virtual address. > + * 1 => This map request contains the machine addess of the PTE to > update. > + */ > +#define _GNTMAP_contains_pte (4) > +#define GNTMAP_contains_pte (1 << _GNTMAP_contains_pte) > + > +/* > + * Bits to be placed in guest kernel available PTE bits (architecture > + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). > + */ > +#define _GNTMAP_guest_avail0 (16) > +#define GNTMAP_guest_avail_mask ((u32)~0 << _GNTMAP_guest_avail0) > + > +/* > + * Values for error status returns. All errors are -ve. > + */ > +#define GNTST_okay (0) /* Normal return. > */ > +#define GNTST_general_error (-1) /* General undefined error. > */ > +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. > */ > +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate > gntref. */ > +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate > handle. */ > +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to > map. */ > +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to > unmap.*/ > +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. > */ > +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. > */ > +#define GNTST_bad_page (-9) /* Specified page was invalid for op. > */ > +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page > boundary. */ > +#define GNTST_address_too_big (-11) /* transfer page address too large. > */ > +#define GNTST_eagain (-12) /* Operation not done; try again. > */ > + > +#define GNTTABOP_error_msgs { \ > + "okay", \ > + "undefined error", \ > + "unrecognised domain id", \ > + "invalid grant reference", \ > + "invalid mapping handle", \ > + "invalid virtual address", \ > + "invalid device address", \ > + "no spare translation slot in the I/O MMU", \ > + "permission denied", \ > + "bad page", \ > + "copy arguments cross page boundary", \ > + "page address size too large", \ > + "operation not done; try again" \ > +} > + > +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ > diff --git a/include/xen/interface/hvm/hvm_op.h > b/include/xen/interface/hvm/hvm_op.h > new file mode 100644 > index 0000000000..1c53cad729 > --- /dev/null > +++ b/include/xen/interface/hvm/hvm_op.h > @@ -0,0 +1,69 @@ > +/* > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + */ > + > +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ > +#define __XEN_PUBLIC_HVM_HVM_OP_H__ > + > +/* Get/set subcommands: the second argument of the hypercall is a > + * pointer to a xen_hvm_param struct. > + */ > +#define HVMOP_set_param 0 > +#define HVMOP_get_param 1 > +struct xen_hvm_param { > + domid_t domid; /* IN */ > + u32 index; /* IN */ > + u64 value; /* IN/OUT */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); > + > +/* Hint from PV drivers for pagetable destruction. */ > +#define HVMOP_pagetable_dying 9 > +struct xen_hvm_pagetable_dying { > + /* Domain with a pagetable about to be destroyed. */ > + domid_t domid; > + /* guest physical address of the toplevel pagetable dying */ > + aligned_u64 gpa; > +}; > + > +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); > + > +enum hvmmem_type_t { > + HVMMEM_ram_rw, /* Normal read/write guest RAM */ > + HVMMEM_ram_ro, /* Read-only; writes are discarded */ > + HVMMEM_mmio_dm, /* Reads and write go to the device > model */ > +}; > + > +#define HVMOP_get_mem_type 15 > +/* Return hvmmem_type_t for the specified pfn. */ > +struct xen_hvm_get_mem_type { > + /* Domain to be queried. */ > + domid_t domid; > + /* OUT variable. */ > + u16 mem_type; > + u16 pad[2]; /* align next field on 8-byte boundary */ > + /* IN variable. */ > + u64 pfn; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); > + > +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ > diff --git a/include/xen/interface/hvm/params.h > b/include/xen/interface/hvm/params.h > new file mode 100644 > index 0000000000..4d61fc58d9 > --- /dev/null > +++ b/include/xen/interface/hvm/params.h > @@ -0,0 +1,127 @@ > +/* > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + */ > + > +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ > +#define __XEN_PUBLIC_HVM_PARAMS_H__ > + > +#include <xen/interface/hvm/hvm_op.h> > + > +/* > + * Parameter space for HVMOP_{set,get}_param. > + */ > + > +#define HVM_PARAM_CALLBACK_IRQ 0 > +/* > + * How should CPU0 event-channel notifications be delivered? > + * > + * If val == 0 then CPU0 event-channel notifications are not delivered. > + * If val != 0, val[63:56] encodes the type, as follows: > + */ > + > +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 > +/* > + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, > + * and disables all notifications. > + */ > + > +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 > +/* > + * val[55:0] is a delivery PCI INTx line: > + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] > + */ > + > +#if defined(__i386__) || defined(__x86_64__) > +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 > +/* > + * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to > know > + * if this delivery method is available. > + */ > +#elif defined(__arm__) || defined(__aarch64__) > +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 > +/* > + * val[55:16] needs to be zero. > + * val[15:8] is interrupt flag of the PPI used by event-channel: > + * bit 8: the PPI is edge(1) or level(0) triggered > + * bit 9: the PPI is active low(1) or high(0) > + * val[7:0] is a PPI number used by event-channel. > + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated > to > + * the notification is handled by the interrupt controller. > + */ > +#endif > + > +#define HVM_PARAM_STORE_PFN 1 > +#define HVM_PARAM_STORE_EVTCHN 2 > + > +#define HVM_PARAM_PAE_ENABLED 4 > + > +#define HVM_PARAM_IOREQ_PFN 5 > + > +#define HVM_PARAM_BUFIOREQ_PFN 6 > + > +/* > + * Set mode for virtual timers (currently x86 only): > + * delay_for_missed_ticks (default): > + * Do not advance a vcpu's time beyond the correct delivery time for > + * interrupts that have been missed due to preemption. Deliver missed > + * interrupts when the vcpu is rescheduled and advance the vcpu's virtual > + * time stepwise for each one. > + * no_delay_for_missed_ticks: > + * As above, missed interrupts are delivered, but guest time always tracks > + * wallclock (i.e., real) time while doing so. > + * no_missed_ticks_pending: > + * No missed interrupts are held pending. Instead, to ensure ticks are > + * delivered at some non-zero rate, if we detect missed ticks then the > + * internal tick alarm is not disabled if the VCPU is preempted during the > + * next tick period. > + * one_missed_tick_pending: > + * Missed interrupts are collapsed together and delivered as one 'late > tick'. > + * Guest time always tracks wallclock (i.e., real) time. > + */ > +#define HVM_PARAM_TIMER_MODE 10 > +#define HVMPTM_delay_for_missed_ticks 0 > +#define HVMPTM_no_delay_for_missed_ticks 1 > +#define HVMPTM_no_missed_ticks_pending 2 > +#define HVMPTM_one_missed_tick_pending 3 > + > +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ > +#define HVM_PARAM_HPET_ENABLED 11 > + > +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ > +#define HVM_PARAM_IDENT_PT 12 > + > +/* Device Model domain, defaults to 0. */ > +#define HVM_PARAM_DM_DOMAIN 13 > + > +/* ACPI S state: currently support S0 and S3 on x86. */ > +#define HVM_PARAM_ACPI_S_STATE 14 > + > +/* TSS used on Intel when CR0.PE=0. */ > +#define HVM_PARAM_VM86_TSS 15 > + > +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ > +#define HVM_PARAM_VPT_ALIGN 16 > + > +/* Console debug shared memory ring and event channel */ > +#define HVM_PARAM_CONSOLE_PFN 17 > +#define HVM_PARAM_CONSOLE_EVTCHN 18 > + > +#define HVM_NR_PARAMS 19 > + > +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ > diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h > new file mode 100644 > index 0000000000..7d74c99226 > --- /dev/null > +++ b/include/xen/interface/io/blkif.h > @@ -0,0 +1,726 @@ > +/************************************************************ > ****************** > + * blkif.h > + * > + * Unified block-device I/O interface for Xen guest OSes. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Copyright (c) 2003-2004, Keir Fraser > + * Copyright (c) 2012, Spectra Logic Corporation > + */ > + > +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ > +#define __XEN_PUBLIC_IO_BLKIF_H__ > + > +#include "ring.h" > +#include "../grant_table.h" > + > +/* > + * Front->back notifications: When enqueuing a new request, sending a > + * notification can be made conditional on req_event (i.e., the generic > + * hold-off mechanism provided by the ring macros). Backends must set > + * req_event appropriately (e.g., using > RING_FINAL_CHECK_FOR_REQUESTS()). > + * > + * Back->front notifications: When enqueuing a new response, sending a > + * notification can be made conditional on rsp_event (i.e., the generic > + * hold-off mechanism provided by the ring macros). Frontends must set > + * rsp_event appropriately (e.g., using > RING_FINAL_CHECK_FOR_RESPONSES()). > + */ > + > +#ifndef blkif_vdev_t > +#define blkif_vdev_t u16 > +#endif > +#define blkif_sector_t u64 > + > +/* > + * Feature and Parameter Negotiation > + * ================================= > + * The two halves of a Xen block driver utilize nodes within the XenStore to > + * communicate capabilities and to negotiate operating parameters. This > + * section enumerates these nodes which reside in the respective front and > + * backend portions of the XenStore, following the XenBus convention. > + * > + * All data in the XenStore is stored as strings. Nodes specifying numeric > + * values are encoded in decimal. Integer value ranges listed below are > + * expressed as fixed sized integer types capable of storing the conversion > + * of a properly formated node string, without loss of information. > + * > + * Any specified default value is in effect if the corresponding XenBus node > + * is not present in the XenStore. > + * > + * XenStore nodes in sections marked "PRIVATE" are solely for use by the > + * driver side whose XenBus tree contains them. > + * > + * XenStore nodes marked "DEPRECATED" in their notes section should only > be > + * used to provide interoperability with legacy implementations. > + * > + * See the XenBus state transition diagram below for details on when XenBus > + * nodes must be published and when they can be queried. > + * > + > ************************************************************** > *************** > + * Backend XenBus Nodes > + > ************************************************************** > *************** > + * > + *------------------ Backend Device Identification (PRIVATE) ------------------ > + * > + * mode > + * Values: "r" (read only), "w" (writable) > + * > + * The read or write access permissions to the backing store to be > + * granted to the frontend. > + * > + * params > + * Values: string > + * > + * A free formatted string providing sufficient information for the > + * hotplug script to attach the device and provide a suitable > + * handler (ie: a block device) for blkback to use. > + * > + * physical-device > + * Values: "MAJOR:MINOR" > + * Notes: 11 > + * > + * MAJOR and MINOR are the major number and minor number of > the > + * backing device respectively. > + * > + * physical-device-path > + * Values: path string > + * > + * A string that contains the absolute path to the disk image. On > + * NetBSD and Linux this is always a block device, while on FreeBSD > + * it can be either a block device or a regular file. > + * > + * type > + * Values: "file", "phy", "tap" > + * > + * The type of the backing device/object. > + * > + * > + * direct-io-safe > + * Values: 0/1 (boolean) > + * Default Value: 0 > + * > + * The underlying storage is not affected by the direct IO memory > + * lifetime bug. See: > + * > https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Flists.xe > n.org%2Farchives%2Fhtml%2Fxen-devel%2F2012-12%2Fmsg01154.html&am > p;data=02%7C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81 > ddc0812%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63729217 > 8170181802&sdata=wXiKB5EvbBokB%2BYrOdMDiKDBwSHo8m1ssXFp0K > RQ0Io%3D&reserved=0 > + * > + * Therefore this option gives the backend permission to use > + * O_DIRECT, notwithstanding that bug. > + * > + * That is, if this option is enabled, use of O_DIRECT is safe, > + * in circumstances where we would normally have avoided it as a > + * workaround for that bug. This option is not relevant for all > + * backends, and even not necessarily supported for those for > + * which it is relevant. A backend which knows that it is not > + * affected by the bug can ignore this option. > + * > + * This option doesn't require a backend to use O_DIRECT, so it > + * should not be used to try to control the caching behaviour. > + * > + *--------------------------------- Features --------------------------------- > + * > + * feature-barrier > + * Values: 0/1 (boolean) > + * Default Value: 0 > + * > + * A value of "1" indicates that the backend can process requests > + * containing the BLKIF_OP_WRITE_BARRIER request opcode. > Requests > + * of this type may still be returned at any time with the > + * BLKIF_RSP_EOPNOTSUPP result code. > + * > + * feature-flush-cache > + * Values: 0/1 (boolean) > + * Default Value: 0 > + * > + * A value of "1" indicates that the backend can process requests > + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. > Requests > + * of this type may still be returned at any time with the > + * BLKIF_RSP_EOPNOTSUPP result code. > + * > + * feature-discard > + * Values: 0/1 (boolean) > + * Default Value: 0 > + * > + * A value of "1" indicates that the backend can process requests > + * containing the BLKIF_OP_DISCARD request opcode. Requests > + * of this type may still be returned at any time with the > + * BLKIF_RSP_EOPNOTSUPP result code. > + * > + * feature-persistent > + * Values: 0/1 (boolean) > + * Default Value: 0 > + * Notes: 7 > + * > + * A value of "1" indicates that the backend can keep the grants used > + * by the frontend driver mapped, so the same set of grants should be > + * used in all transactions. The maximum number of grants the > backend > + * can map persistently depends on the implementation, but ideally it > + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. > Using this > + * feature the backend doesn't need to unmap each grant, preventing > + * costly TLB flushes. The backend driver should only map grants > + * persistently if the frontend supports it. If a backend driver chooses > + * to use the persistent protocol when the frontend doesn't support it, > + * it will probably hit the maximum number of persistently mapped > grants > + * (due to the fact that the frontend won't be reusing the same > grants), > + * and fall back to non-persistent mode. Backend implementations > may > + * shrink or expand the number of persistently mapped grants without > + * notifying the frontend depending on memory constraints (this might > + * cause a performance degradation). > + * > + * If a backend driver wants to limit the maximum number of > persistently > + * mapped grants to a value less than RING_SIZE * > + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be > used to > + * discard the grants that are less commonly used. Using a LRU in the > + * backend driver paired with a LIFO queue in the frontend will > + * allow us to have better performance in this scenario. > + * > + *----------------------- Request Transport Parameters ------------------------ > + * > + * max-ring-page-order > + * Values: <uint32_t> > + * Default Value: 0 > + * Notes: 1, 3 > + * > + * The maximum supported size of the request ring buffer in units of > + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, > + * etc.). > + * > + * max-ring-pages > + * Values: <uint32_t> > + * Default Value: 1 > + * Notes: DEPRECATED, 2, 3 > + * > + * The maximum supported size of the request ring buffer in units of > + * machine pages. The value must be a power of 2. > + * > + *------------------------- Backend Device Properties ------------------------- > + * > + * discard-enable > + * Values: 0/1 (boolean) > + * Default Value: 1 > + * > + * This optional property, set by the toolstack, instructs the backend > + * to offer (or not to offer) discard to the frontend. If the property > + * is missing the backend should offer discard if the backing storage > + * actually supports it. > + * > + * discard-alignment > + * Values: <uint32_t> > + * Default Value: 0 > + * Notes: 4, 5 > + * > + * The offset, in bytes from the beginning of the virtual block device, > + * to the first, addressable, discard extent on the underlying device. > + * > + * discard-granularity > + * Values: <uint32_t> > + * Default Value: <"sector-size"> > + * Notes: 4 > + * > + * The size, in bytes, of the individually addressable discard extents > + * of the underlying device. > + * > + * discard-secure > + * Values: 0/1 (boolean) > + * Default Value: 0 > + * Notes: 10 > + * > + * A value of "1" indicates that the backend can process > BLKIF_OP_DISCARD > + * requests with the BLKIF_DISCARD_SECURE flag set. > + * > + * info > + * Values: <uint32_t> (bitmap) > + * > + * A collection of bit flags describing attributes of the backing > + * device. The VDISK_* macros define the meaning of each bit > + * location. > + * > + * sector-size > + * Values: <uint32_t> > + * > + * The logical block size, in bytes, of the underlying storage. This > + * must be a power of two with a minimum value of 512. > + * > + * NOTE: Because of implementation bugs in some frontends this > must be > + * set to 512, unless the frontend advertizes a non-zero value > + * in its "feature-large-sector-size" xenbus node. (See below). > + * > + * physical-sector-size > + * Values: <uint32_t> > + * Default Value: <"sector-size"> > + * > + * The physical block size, in bytes, of the backend storage. This > + * must be an integer multiple of "sector-size". > + * > + * sectors > + * Values: <u64> > + * > + * The size of the backend device, expressed in units of "sector-size". > + * The product of "sector-size" and "sectors" must also be an integer > + * multiple of "physical-sector-size", if that node is present. > + * > + > ************************************************************** > *************** > + * Frontend XenBus Nodes > + > ************************************************************** > *************** > + * > + *----------------------- Request Transport Parameters ----------------------- > + * > + * event-channel > + * Values: <uint32_t> > + * > + * The identifier of the Xen event channel used to signal activity > + * in the ring buffer. > + * > + * ring-ref > + * Values: <uint32_t> > + * Notes: 6 > + * > + * The Xen grant reference granting permission for the backend to > map > + * the sole page in a single page sized ring buffer. > + * > + * ring-ref%u > + * Values: <uint32_t> > + * Notes: 6 > + * > + * For a frontend providing a multi-page ring, a "number of ring pages" > + * sized list of nodes, each containing a Xen grant reference granting > + * permission for the backend to map the page of the ring located > + * at page index "%u". Page indexes are zero based. > + * > + * protocol > + * Values: string (XEN_IO_PROTO_ABI_*) > + * Default Value: XEN_IO_PROTO_ABI_NATIVE > + * > + * The machine ABI rules governing the format of all ring request and > + * response structures. > + * > + * ring-page-order > + * Values: <uint32_t> > + * Default Value: 0 > + * Maximum Value: MAX(ffs(max-ring-pages) - 1, > max-ring-page-order) > + * Notes: 1, 3 > + * > + * The size of the frontend allocated request ring buffer in units > + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, > + * etc.). > + * > + * num-ring-pages > + * Values: <uint32_t> > + * Default Value: 1 > + * Maximum Value: MAX(max-ring-pages,(0x1 << > max-ring-page-order)) > + * Notes: DEPRECATED, 2, 3 > + * > + * The size of the frontend allocated request ring buffer in units of > + * machine pages. The value must be a power of 2. > + * > + *--------------------------------- Features --------------------------------- > + * > + * feature-persistent > + * Values: 0/1 (boolean) > + * Default Value: 0 > + * Notes: 7, 8, 9 > + * > + * A value of "1" indicates that the frontend will reuse the same grants > + * for all transactions, allowing the backend to map them with write > + * access (even when it should be read-only). If the frontend hits the > + * maximum number of allowed persistently mapped grants, it can > fallback > + * to non persistent mode. This will cause a performance degradation, > + * since the the backend driver will still try to map those grants > + * persistently. Since the persistent grants protocol is compatible with > + * the previous protocol, a frontend driver can choose to work in > + * persistent mode even when the backend doesn't support it. > + * > + * It is recommended that the frontend driver stores the persistently > + * mapped grants in a LIFO queue, so a subset of all persistently > mapped > + * grants gets used commonly. This is done in case the backend driver > + * decides to limit the maximum number of persistently mapped > grants > + * to a value less than RING_SIZE * > BLKIF_MAX_SEGMENTS_PER_REQUEST. > + * > + * feature-large-sector-size > + * Values: 0/1 (boolean) > + * Default Value: 0 > + * > + * A value of "1" indicates that the frontend will correctly supply and > + * interpret all sector-based quantities in terms of the "sector-size" > + * value supplied in the backend info, whatever that may be set to. > + * If this node is not present or its value is "0" then it is assumed > + * that the frontend requires that the logical block size is 512 as it > + * is hardcoded (which is the case in some frontend implementations). > + * > + *------------------------- Virtual Device Properties ------------------------- > + * > + * device-type > + * Values: "disk", "cdrom", "floppy", etc. > + * > + * virtual-device > + * Values: <uint32_t> > + * > + * A value indicating the physical device to virtualize within the > + * frontend's domain. (e.g. "The first ATA disk", "The third SCSI > + * disk", etc.) > + * > + * See docs/misc/vbd-interface.txt for details on the format of this > + * value. > + * > + * Notes > + * ----- > + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer > + * PV drivers. > + * (2) Multi-page ring buffer scheme first used in some RedHat distributions > + * including a distribution deployed on certain nodes of the Amazon > + * EC2 cluster. > + * (3) Support for multi-page ring buffers was implemented independently, > + * in slightly different forms, by both Citrix and RedHat/Amazon. > + * For full interoperability, block front and backends should publish > + * identical ring parameters, adjusted for unit differences, to the > + * XenStore nodes used in both schemes. > + * (4) Devices that support discard functionality may internally allocate space > + * (discardable extents) in units that are larger than the exported > logical > + * block size. If the backing device has such discardable extents the > + * backend should provide both discard-granularity and > discard-alignment. > + * Providing just one of the two may be considered an error by the > frontend. > + * Backends supporting discard should include discard-granularity and > + * discard-alignment even if it supports discarding individual sectors. > + * Frontends should assume discard-alignment == 0 and > discard-granularity > + * == sector size if these keys are missing. > + * (5) The discard-alignment parameter allows a physical device to be > + * partitioned into virtual devices that do not necessarily begin or > + * end on a discardable extent boundary. > + * (6) When there is only a single page allocated to the request ring, > + * 'ring-ref' is used to communicate the grant reference for this > + * page to the backend. When using a multi-page ring, the 'ring-ref' > + * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. > + * (7) When using persistent grants data has to be copied from/to the page > + * where the grant is currently mapped. The overhead of doing this > copy > + * however doesn't suppress the speed improvement of not having to > unmap > + * the grants. > + * (8) The frontend driver has to allow the backend driver to map all grants > + * with write access, even when they should be mapped read-only, > since > + * further requests may reuse these grants and require write > permissions. > + * (9) Linux implementation doesn't have a limit on the maximum number of > + * grants that can be persistently mapped in the frontend driver, but > + * due to the frontent driver implementation it should never be bigger > + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. > + *(10) The discard-secure property may be present and will be set to 1 if the > + * backing device supports secure discard. > + *(11) Only used by Linux and NetBSD. > + */ > + > +/* > + * Multiple hardware queues/rings: > + * If supported, the backend will write the key "multi-queue-max-queues" to > + * the directory for that vbd, and set its value to the maximum supported > + * number of queues. > + * Frontends that are aware of this feature and wish to use it can write the > + * key "multi-queue-num-queues" with the number they wish to use, which > must be > + * greater than zero, and no more than the value reported by the backend in > + * "multi-queue-max-queues". > + * > + * For frontends requesting just one queue, the usual event-channel and > + * ring-ref keys are written as before, simplifying the backend processing > + * to avoid distinguishing between a frontend that doesn't understand the > + * multi-queue feature, and one that does, but requested only one queue. > + * > + * Frontends requesting two or more queues must not write the toplevel > + * event-channel and ring-ref keys, instead writing those keys under > sub-keys > + * having the name "queue-N" where N is the integer ID of the queue/ring > for > + * which those keys belong. Queues are indexed from zero. > + * For example, a frontend with two queues must write the following set of > + * queue-related keys: > + * > + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" > + * /local/domain/1/device/vbd/0/queue-0 = "" > + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" > + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" > + * /local/domain/1/device/vbd/0/queue-1 = "" > + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" > + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" > + * > + * It is also possible to use multiple queues/rings together with > + * feature multi-page ring buffer. > + * For example, a frontend requests two queues/rings and the size of each > ring > + * buffer is two pages must write the following set of related keys: > + * > + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" > + * /local/domain/1/device/vbd/0/ring-page-order = "1" > + * /local/domain/1/device/vbd/0/queue-0 = "" > + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" > + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" > + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" > + * /local/domain/1/device/vbd/0/queue-1 = "" > + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" > + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" > + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" > + * > + */ > + > +/* > + * STATE DIAGRAMS > + * > + > ************************************************************** > *************** > + * Startup > * > + > ************************************************************** > *************** > + * > + * Tool stack creates front and back nodes with state XenbusStateInitialising. > + * > + * Front Back > + * ================================= > ===================================== > + * XenbusStateInitialising XenbusStateInitialising > + * o Query virtual device o Query backend device > identification > + * properties. data. > + * o Setup OS device instance. o Open and validate backend > device. > + * o Publish backend > features and > + * transport parameters. > + * | > + * | > + * V > + * XenbusStateInitWait > + * > + * o Query backend features and > + * transport parameters. > + * o Allocate and initialize the > + * request ring. > + * o Publish transport parameters > + * that will be in effect during > + * this connection. > + * | > + * | > + * V > + * XenbusStateInitialised > + * > + * o Query frontend > transport parameters. > + * o Connect to the request > ring and > + * event channel. > + * o Publish backend device > properties. > + * | > + * | > + * V > + * XenbusStateConnected > + * > + * o Query backend device properties. > + * o Finalize OS virtual device > + * instance. > + * | > + * | > + * V > + * XenbusStateConnected > + * > + * Note: Drivers that do not support any optional features, or the negotiation > + * of transport parameters, can skip certain states in the state > machine: > + * > + * o A frontend may transition to XenbusStateInitialised without > + * waiting for the backend to enter XenbusStateInitWait. In this > + * case, default transport parameters are in effect and any > + * transport parameters published by the frontend must contain > + * their default values. > + * > + * o A backend may transition to XenbusStateInitialised, bypassing > + * XenbusStateInitWait, without waiting for the frontend to first > + * enter the XenbusStateInitialised state. In this case, default > + * transport parameters are in effect and any transport > parameters > + * published by the backend must contain their default values. > + * > + * Drivers that support optional features and/or transport parameter > + * negotiation must tolerate these additional state transition paths. > + * In general this means performing the work of any skipped state > + * transition, if it has not already been performed, in addition to the > + * work associated with entry into the current state. > + */ > + > +/* > + * REQUEST CODES. > + */ > +#define BLKIF_OP_READ 0 > +#define BLKIF_OP_WRITE 1 > +/* > + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER > + * operation code ("barrier request") must be completed prior to the > + * execution of the barrier request. All writes issued after the barrier > + * request must not execute until after the completion of the barrier request. > + * > + * Optional. See "feature-barrier" XenBus node documentation above. > + */ > +#define BLKIF_OP_WRITE_BARRIER 2 > +/* > + * Commit any uncommitted contents of the backing device's volatile cache > + * to stable storage. > + * > + * Optional. See "feature-flush-cache" XenBus node documentation above. > + */ > +#define BLKIF_OP_FLUSH_DISKCACHE 3 > +/* > + * Used in SLES sources for device specific command packet > + * contained within the request. Reserved for that purpose. > + */ > +#define BLKIF_OP_RESERVED_1 4 > +/* > + * Indicate to the backend device that a region of storage is no longer in > + * use, and may be discarded at any time without impact to the client. If > + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the > + * discarded region on the device must be rendered unrecoverable before > the > + * command returns. > + * > + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), > + * command on a native device. > + * > + * More information about trim/unmap operations can be found at: > + * > https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Ft13.org > %2FDocuments%2FUploadedDocuments%2Fdocs2008%2F&data=02%7 > C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%7C > 686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637292178170181802 > &sdata=JOOjsvkjqxkuoF47PMVw1loNNDhxPCXQVdPQQklTIGM%3D&am > p;reserved=0 > + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc > + * > https://eur01.safelinks.protection.outlook.com/?url=http%3A%2F%2Fwww.s > eagate.com%2Fstaticfiles%2Fsupport%2Fdisc%2Fmanuals%2F&data=02 > %7C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812% > 7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C6372921781701818 > 02&sdata=gd5Cvr1Q9%2Bv%2BfUS5OleuozBITkjbybYoR302s4XsVv8%3D > &reserved=0 > + * Interface%20manuals/100293068c.pdf > + * > + * Optional. See "feature-discard", "discard-alignment", > + * "discard-granularity", and "discard-secure" in the XenBus node > + * documentation above. > + */ > +#define BLKIF_OP_DISCARD 5 > + > +/* > + * Recognized if "feature-max-indirect-segments" in present in the backend > + * xenbus info. The "feature-max-indirect-segments" node contains the > maximum > + * number of segments allowed by the backend per request. If the node is > + * present, the frontend might use blkif_request_indirect structs in order to > + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST > (11). The > + * maximum number of indirect segments is fixed by the backend, but the > + * frontend can issue requests with any number of indirect segments as long > as > + * it's less than the number provided by the backend. The indirect_grefs field > + * in blkif_request_indirect should be filled by the frontend with the > + * grant references of the pages that are holding the indirect segments. > + * These pages are filled with an array of blkif_request_segment that hold > the > + * information about the segments. The number of indirect pages to use is > + * determined by the number of segments an indirect request contains. > Every > + * indirect page can contain a maximum of > + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to > + * calculate the number of indirect pages to use we have to do > + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct > blkif_request_segment))). > + * > + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* > + * create the "feature-max-indirect-segments" node! > + */ > +#define BLKIF_OP_INDIRECT 6 > + > +/* > + * Maximum scatter/gather segments per request. > + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. > + * NB. This could be 12 if the ring indexes weren't stored in the same page. > + */ > +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 > + > +/* > + * Maximum number of indirect pages to use per request. > + */ > +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 > + > +/* > + * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as > + * 'sector_number' in blkif_request, blkif_request_discard and > + * blkif_request_indirect are sector-based quantities. See the description > + * of the "feature-large-sector-size" frontend xenbus node above for > + * more information. > + */ > +struct blkif_request_segment { > + grant_ref_t gref; /* reference to I/O buffer frame */ > + /* @first_sect: first sector in frame to transfer (inclusive). */ > + /* @last_sect: last sector in frame to transfer (inclusive). */ > + u8 first_sect, last_sect; > +}; > + > +/* > + * Starting ring element for any I/O request. > + */ > +struct blkif_request { > + u8 operation; /* BLKIF_OP_??? > */ > + u8 nr_segments; /* number of segments > */ > + blkif_vdev_t handle; /* only for read/write requests > */ > + u64 id; /* private guest value, echoed in resp */ > + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ > + struct blkif_request_segment > seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > +}; > + > +typedef struct blkif_request blkif_request_t; > + > +/* > + * Cast to this structure when blkif_request.operation == > BLKIF_OP_DISCARD > + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) > + */ > +struct blkif_request_discard { > + u8 operation; /* BLKIF_OP_DISCARD > */ > + u8 flag; /* BLKIF_DISCARD_SECURE or zero > */ > +#define BLKIF_DISCARD_SECURE (1 << 0) /* ignored if discard-secure=0 > */ > + blkif_vdev_t handle; /* same as for read/write requests > */ > + u64 id; /* private guest value, echoed in resp */ > + blkif_sector_t sector_number;/* start sector idx on disk > */ > + u64 nr_sectors; /* number of contiguous sectors to discard*/ > +}; > + > +typedef struct blkif_request_discard blkif_request_discard_t; > + > +struct blkif_request_indirect { > + u8 operation; /* BLKIF_OP_INDIRECT > */ > + u8 indirect_op; /* BLKIF_OP_{READ/WRITE} > */ > + u16 nr_segments; /* number of segments > */ > + u64 id; /* private guest value, echoed in resp */ > + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ > + blkif_vdev_t handle; /* same as for read/write requests > */ > + grant_ref_t > indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; > +#ifdef __i386__ > + u64 pad; /* Make it 64 byte aligned on i386 > */ > +#endif > +}; > + > +typedef struct blkif_request_indirect blkif_request_indirect_t; > + > +struct blkif_response { > + u64 id; /* copied from request */ > + u8 operation; /* copied from request */ > + s16 status; /* BLKIF_RSP_??? */ > +}; > + > +typedef struct blkif_response blkif_response_t; > + > +/* > + * STATUS RETURN CODES. > + */ > + /* Operation not supported (only happens on barrier writes). */ > +#define BLKIF_RSP_EOPNOTSUPP -2 > + /* Operation failed for some unspecified reason (-EIO). */ > +#define BLKIF_RSP_ERROR -1 > + /* Operation completed successfully. */ > +#define BLKIF_RSP_OKAY 0 > + > +/* > + * Generate blkif ring structures and types. > + */ > +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); > + > +#define VDISK_CDROM 0x1 > +#define VDISK_REMOVABLE 0x2 > +#define VDISK_READONLY 0x4 > + > +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/include/xen/interface/io/console.h > b/include/xen/interface/io/console.h > new file mode 100644 > index 0000000000..3489fc7a60 > --- /dev/null > +++ b/include/xen/interface/io/console.h > @@ -0,0 +1,56 @@ > +/************************************************************ > ****************** > + * console.h > + * > + * Console I/O interface for Xen guest OSes. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Copyright (c) 2005, Keir Fraser > + */ > + > +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ > +#define __XEN_PUBLIC_IO_CONSOLE_H__ > + > +typedef u32 XENCONS_RING_IDX; > + > +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring) - 1)) > + > +struct xencons_interface { > + char in[1024]; > + char out[2048]; > + XENCONS_RING_IDX in_cons, in_prod; > + XENCONS_RING_IDX out_cons, out_prod; > +}; > + > +#ifdef XEN_WANT_FLEX_CONSOLE_RING > +#include "ring.h" > +DEFINE_XEN_FLEX_RING(xencons); > +#endif > + > +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/include/xen/interface/io/protocols.h > b/include/xen/interface/io/protocols.h > new file mode 100644 > index 0000000000..52b4de0f81 > --- /dev/null > +++ b/include/xen/interface/io/protocols.h > @@ -0,0 +1,42 @@ > +/************************************************************ > ****************** > + * protocols.h > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Copyright (c) 2008, Keir Fraser > + */ > + > +#ifndef __XEN_PROTOCOLS_H__ > +#define __XEN_PROTOCOLS_H__ > + > +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" > +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" > +#define XEN_IO_PROTO_ABI_ARM "arm-abi" > + > +#if defined(__i386__) > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 > +#elif defined(__x86_64__) > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 > +#elif defined(__arm__) || defined(__aarch64__) > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM > +#else > +# error arch fixup needed here > +#endif > + > +#endif > diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h > new file mode 100644 > index 0000000000..4e02678e3c > --- /dev/null > +++ b/include/xen/interface/io/ring.h > @@ -0,0 +1,479 @@ > +/************************************************************ > ****************** > + * ring.h > + * > + * Shared producer-consumer ring macros. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Tim Deegan and Andrew Warfield November 2004. > + */ > + > +#ifndef __XEN_PUBLIC_IO_RING_H__ > +#define __XEN_PUBLIC_IO_RING_H__ > + > +/* > + * When #include'ing this header, you need to provide the following > + * declaration upfront: > + * - standard integers types (u8, u16, etc) > + * They are provided by stdint.h of the standard headers. > + * > + * In addition, if you intend to use the FLEX macros, you also need to > + * provide the following, before invoking the FLEX macros: > + * - size_t > + * - memcpy > + * - grant_ref_t > + * These declarations are provided by string.h of the standard headers, > + * and grant_table.h from the Xen public headers. > + */ > + > +#include <xen/interface/grant_table.h> > + > +typedef unsigned int RING_IDX; > + > +/* Round a 32-bit unsigned constant down to the nearest power of two. */ > +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) > & 0x1)) > +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : > __RD2(_x)) > +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : > __RD4(_x)) > +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : > __RD8(_x)) > +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : > __RD16(_x)) > + > +/* > + * Calculate size of a shared ring, given the total available space for the > + * ring and indexes (_sz), and the name tag of the request/response > structure. > + * A ring contains as many entries as will fit, rounded down to the nearest > + * power of two (so we can mask with (size-1) to loop around). > + */ > +#define __CONST_RING_SIZE(_s, _sz) \ > + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ > + sizeof(((struct _s##_sring *)0)->ring[0]))) > +/* > + * The same for passing in an actual pointer instead of a name tag. > + */ > +#define __RING_SIZE(_s, _sz) \ > + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) > + > +/* > + * Macros to make the correct C datatypes for a new kind of ring. > + * > + * To make a new ring datatype, you need to have two message structures, > + * let's say request_t, and response_t already defined. > + * > + * In a header where you want the ring datatype declared, you then do: > + * > + * DEFINE_RING_TYPES(mytag, request_t, response_t); > + * > + * These expand out to give you a set of types, as you can see below. > + * The most important of these are: > + * > + * mytag_sring_t - The shared ring. > + * mytag_front_ring_t - The 'front' half of the ring. > + * mytag_back_ring_t - The 'back' half of the ring. > + * > + * To initialize a ring in your code you need to know the location and size > + * of the shared memory area (PAGE_SIZE, for instance). To initialise > + * the front half: > + * > + * mytag_front_ring_t front_ring; > + * SHARED_RING_INIT((mytag_sring_t *)shared_page); > + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, > PAGE_SIZE); > + * > + * Initializing the back follows similarly (note that only the front > + * initializes the shared ring): > + * > + * mytag_back_ring_t back_ring; > + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, > PAGE_SIZE); > + */ > + > +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) > \ > + \ > +/* Shared ring entry */ > \ > +union __name##_sring_entry > { \ > + __req_t req; > \ > + __rsp_t rsp; > \ > +}; > \ > + \ > +/* Shared ring page */ > \ > +struct __name##_sring > { \ > + RING_IDX req_prod, req_event; > \ > + RING_IDX rsp_prod, rsp_event; > \ > + union > { > \ > + struct > { \ > + u8 smartpoll_active; > \ > + } netif; > \ > + struct > { \ > + u8 msg; > \ > + } tapif_user; > \ > + u8 pvt_pad[4]; > \ > + } pvt; > \ > + u8 __pad[44]; > \ > + union __name##_sring_entry ring[1]; /* variable-length */ > \ > +}; > \ > + \ > +/* "Front" end's private variables */ > \ > +struct __name##_front_ring > { \ > + RING_IDX req_prod_pvt; > \ > + RING_IDX rsp_cons; > \ > + unsigned int nr_ents; > \ > + struct __name##_sring *sring; > \ > +}; > \ > + \ > +/* "Back" end's private variables */ > \ > +struct __name##_back_ring > { \ > + RING_IDX rsp_prod_pvt; > \ > + RING_IDX req_cons; > \ > + unsigned int nr_ents; > \ > + struct __name##_sring *sring; > \ > +}; > \ > + \ > +/* Syntactic sugar */ > \ > +typedef struct __name##_sring __name##_sring_t; > \ > +typedef struct __name##_front_ring __name##_front_ring_t; > \ > +typedef struct __name##_back_ring __name##_back_ring_t > + > +/* > + * Macros for manipulating rings. > + * > + * FRONT_RING_whatever works on the "front end" of a ring: here > + * requests are pushed on to the ring and responses taken off it. > + * > + * BACK_RING_whatever works on the "back end" of a ring: here > + * requests are taken off the ring and responses put on. > + * > + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. > + * This is OK in 1-for-1 request-response situations where the > + * requestor (front end) never has more than RING_SIZE()-1 > + * outstanding requests. > + */ > + > +/* Initialising empty rings */ > +#define SHARED_RING_INIT(_s) do > { \ > + (_s)->req_prod = (_s)->rsp_prod = 0; > \ > + (_s)->req_event = (_s)->rsp_event = 1; > \ > + (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad)); > \ > + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); > \ > +} while (0) > + > +#define FRONT_RING_INIT(_r, _s, __size) do > { \ > + (_r)->req_prod_pvt = 0; > \ > + (_r)->rsp_cons = 0; > \ > + (_r)->nr_ents = __RING_SIZE(_s, __size); > \ > + (_r)->sring = (_s); > \ > +} while (0) > + > +#define BACK_RING_INIT(_r, _s, __size) do > { \ > + (_r)->rsp_prod_pvt = 0; > \ > + (_r)->req_cons = 0; > \ > + (_r)->nr_ents = __RING_SIZE(_s, __size); > \ > + (_r)->sring = (_s); > \ > +} while (0) > + > +/* How big is this ring? */ > +#define RING_SIZE(_r) > \ > + ((_r)->nr_ents) > + > +/* Number of free requests (for use on front side only). */ > +#define RING_FREE_REQUESTS(_r) > \ > + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) > + > +/* Test if there is an empty slot available on the front ring. > + * (This is only meaningful from the front. ) > + */ > +#define RING_FULL(_r) > \ > + (RING_FREE_REQUESTS(_r) == 0) > + > +/* Test if there are outstanding messages to be processed on a ring. */ > +#define RING_HAS_UNCONSUMED_RESPONSES(_r) > \ > + ((_r)->sring->rsp_prod - (_r)->rsp_cons) > + > +#ifdef __GNUC__ > +#define RING_HAS_UNCONSUMED_REQUESTS(_r) > ({ \ > + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; > \ > + unsigned int rsp = RING_SIZE(_r) - > \ > + ((_r)->req_cons - (_r)->rsp_prod_pvt); > \ > + req < rsp ? req : rsp; > \ > +}) > +#else > +/* Same as above, but without the nice GCC ({ ... }) syntax. */ > +#define RING_HAS_UNCONSUMED_REQUESTS(_r) > \ > + ((((_r)->sring->req_prod - (_r)->req_cons) < > \ > + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? > \ > + ((_r)->sring->req_prod - (_r)->req_cons) : > \ > + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) > +#endif > + > +/* Direct access to individual ring elements, by index. */ > +#define RING_GET_REQUEST(_r, _idx) > \ > + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) > + > +/* > + * Get a local copy of a request. > + * > + * Use this in preference to RING_GET_REQUEST() so all processing is > + * done on a local copy that cannot be modified by the other end. > + * > + * Note that > https://eur01.safelinks.protection.outlook.com/?url=https%3A%2F%2Fgcc.gn > u.org%2Fbugzilla%2Fshow_bug.cgi%3Fid%3D58145&data=02%7C01%7C > peng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%7C686ea1d > 3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637292178170181802&sd > ata=hZDVA%2FOZbJO%2Fh4uzROYzVzmB05ekJWbcnkDAXsHzClc%3D&re > served=0 may cause this > + * to be ineffective where _req is a struct which consists of only bitfields. > + */ > +#define RING_COPY_REQUEST(_r, _idx, _req) do { > \ > + /* Use volatile to force the copy into _req. */ \ > + *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); > \ > +} while (0) > + > +#define RING_GET_RESPONSE(_r, _idx) > \ > + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) > + > +/* Loop termination condition: Would the specified index overflow the ring? > */ > +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) > \ > + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) > + > +/* Ill-behaved frontend determination: Can there be this many requests? */ > +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) > \ > + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) > + > +#define RING_PUSH_REQUESTS(_r) do > { \ > + xen_wmb(); /* back sees requests /before/ updated producer index */ > \ > + (_r)->sring->req_prod = (_r)->req_prod_pvt; > \ > +} while (0) > + > +#define RING_PUSH_RESPONSES(_r) do > { \ > + xen_wmb(); /* front sees resps /before/ updated producer index */ > \ > + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; > \ > +} while (0) > + > +/* > + * Notification hold-off (req_event and rsp_event): > + * > + * When queueing requests or responses on a shared ring, it may not always > be > + * necessary to notify the remote end. For example, if requests are in flight > + * in a backend, the front may be able to queue further requests without > + * notifying the back (if the back checks for new requests when it queues > + * responses). > + * > + * When enqueuing requests or responses: > + * > + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The > second argument > + * is a boolean return value. True indicates that the receiver requires an > + * asynchronous notification. > + * > + * After dequeuing requests or responses (before sleeping the connection): > + * > + * Use RING_FINAL_CHECK_FOR_REQUESTS() or > RING_FINAL_CHECK_FOR_RESPONSES(). > + * The second argument is a boolean return value. True indicates that there > + * are pending messages on the ring (i.e., the connection should not be put > + * to sleep). > + * > + * These macros will set the req_event/rsp_event field to trigger a > + * notification on the very next message that is enqueued. If you want to > + * create batches of work (i.e., only receive a notification after several > + * messages have been enqueued) then you will need to create a > customised > + * version of the FINAL_CHECK macro in your own code, which sets the > event > + * field appropriately. > + */ > + > +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do > { \ > + RING_IDX __old = (_r)->sring->req_prod; > \ > + RING_IDX __new = (_r)->req_prod_pvt; > \ > + xen_wmb(); /* back sees requests /before/ updated producer index */ > \ > + (_r)->sring->req_prod = __new; > \ > + xen_mb(); /* back sees new requests /before/ we check req_event */ > \ > + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < > \ > + (RING_IDX)(__new - __old)); \ > +} while (0) > + > +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do > { \ > + RING_IDX __old = (_r)->sring->rsp_prod; > \ > + RING_IDX __new = (_r)->rsp_prod_pvt; > \ > + xen_wmb(); /* front sees resps /before/ updated producer index */ > \ > + (_r)->sring->rsp_prod = __new; > \ > + xen_mb(); /* front sees new resps /before/ we check rsp_event */ > \ > + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < > \ > + (RING_IDX)(__new - __old)); \ > +} while (0) > + > +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do > { \ > + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); > \ > + if (_work_to_do) \ > + break; > \ > + (_r)->sring->req_event = (_r)->req_cons + 1; > \ > + xen_mb(); > \ > + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); > \ > +} while (0) > + > +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do > { \ > + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); > \ > + if (_work_to_do) \ > + break; > \ > + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; > \ > + xen_mb(); > \ > + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); > \ > +} while (0) > + > +/* > + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings > and > + * functions to check if there is data on the ring, and to read and > + * write to them. > + * > + * DEFINE_XEN_FLEX_RING is similar to > DEFINE_XEN_FLEX_RING_AND_INTF, but > + * does not define the indexes page. As different protocols can have > + * extensions to the basic format, this macro allow them to define their > + * own struct. > + * > + * XEN_FLEX_RING_SIZE > + * Convenience macro to calculate the size of one of the two rings > + * from the overall order. > + * > + * $NAME_mask > + * Function to apply the size mask to an index, to reduce the index > + * within the range [0-size]. > + * > + * $NAME_read_packet > + * Function to read data from the ring. The amount of data to read is > + * specified by the "size" argument. > + * > + * $NAME_write_packet > + * Function to write data to the ring. The amount of data to write is > + * specified by the "size" argument. > + * > + * $NAME_get_ring_ptr > + * Convenience function that returns a pointer to read/write to the > + * ring at the right location. > + * > + * $NAME_data_intf > + * Indexes page, shared between frontend and backend. It also > + * contains the array of grant refs. > + * > + * $NAME_queued > + * Function to calculate how many bytes are currently on the ring, > + * ready to be read. It can also be used to calculate how much free > + * space is currently on the ring (XEN_FLEX_RING_SIZE() - > + * $NAME_queued()). > + */ > + > +#ifndef XEN_PAGE_SHIFT > +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always > + * 4K, regardless of the architecture, and page granularity chosen by > + * operating systems. > + */ > +#define XEN_PAGE_SHIFT 12 > +#endif > +#define XEN_FLEX_RING_SIZE(order) > \ > + (1UL << ((order) + XEN_PAGE_SHIFT - 1)) > + > +#define DEFINE_XEN_FLEX_RING(name) > \ > +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) > \ > +{ > \ > + return idx & (ring_size - 1); > \ > +} > \ > + \ > +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, > \ > + RING_IDX idx, \ > + RING_IDX ring_size) \ > +{ > \ > + return buf + name##_mask(idx, ring_size); > \ > +} > \ > + \ > +static inline void name##_read_packet(void *opaque, > \ > + const unsigned char *buf, \ > + size_t size, > \ > + RING_IDX masked_prod, > \ > + RING_IDX *masked_cons, > \ > + RING_IDX ring_size) > \ > +{ > \ > + if (*masked_cons < masked_prod || > \ > + size <= ring_size - *masked_cons) > { \ > + memcpy(opaque, buf + *masked_cons, size); > \ > + } else > { > \ > + memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); > \ > + memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, > \ > + size - (ring_size - *masked_cons)); > \ > + } > \ > + *masked_cons = name##_mask(*masked_cons + size, ring_size); > \ > +} > \ > + \ > +static inline void name##_write_packet(unsigned char *buf, > \ > + const void *opaque, > \ > + size_t size, > \ > + RING_IDX *masked_prod, > \ > + RING_IDX masked_cons, > \ > + RING_IDX ring_size) > \ > +{ > \ > + if (*masked_prod < masked_cons || > \ > + size <= ring_size - *masked_prod) > { \ > + memcpy(buf + *masked_prod, opaque, size); > \ > + } else > { > \ > + memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); > \ > + memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), > \ > + size - (ring_size - *masked_prod)); > \ > + } > \ > + *masked_prod = name##_mask(*masked_prod + size, ring_size); > \ > +} > \ > + \ > +static inline RING_IDX name##_queued(RING_IDX prod, > \ > + RING_IDX cons, > \ > + RING_IDX ring_size) > \ > +{ > \ > + RING_IDX size; > \ > + \ > + if (prod == cons) > \ > + return 0; > \ > + \ > + prod = name##_mask(prod, ring_size); > \ > + cons = name##_mask(cons, ring_size); > \ > + \ > + if (prod == cons) > \ > + return ring_size; > \ > + \ > + if (prod > cons) > \ > + size = prod - cons; > \ > + else > \ > + size = ring_size - (cons - prod); > \ > + return size; > \ > +} > \ > + \ > +struct name##_data > { > \ > + unsigned char *in; /* half of the allocation */ > \ > + unsigned char *out; /* half of the allocation */ > \ > +} > + > +#define DEFINE_XEN_FLEX_RING_AND_INTF(name) > \ > +struct name##_data_intf > { \ > + RING_IDX in_cons, in_prod; > \ > + \ > + u8 pad1[56]; > \ > + \ > + RING_IDX out_cons, out_prod; > \ > + \ > + u8 pad2[56]; > \ > + \ > + RING_IDX ring_order; > \ > + grant_ref_t ref[]; > \ > +}; > \ > +DEFINE_XEN_FLEX_RING(name) > + > +#endif /* __XEN_PUBLIC_IO_RING_H__ */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 8 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/include/xen/interface/io/xenbus.h > b/include/xen/interface/io/xenbus.h > new file mode 100644 > index 0000000000..f452748b03 > --- /dev/null > +++ b/include/xen/interface/io/xenbus.h > @@ -0,0 +1,81 @@ > +/************************************************************ > ***************** > + * xenbus.h > + * > + * Xenbus protocol details. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Copyright (C) 2005 XenSource Ltd. > + */ > + > +#ifndef _XEN_PUBLIC_IO_XENBUS_H > +#define _XEN_PUBLIC_IO_XENBUS_H > + > +/* > + * The state of either end of the Xenbus, i.e. the current communication > + * status of initialisation across the bus. States here imply nothing about > + * the state of the connection between the driver and the kernel's device > + * layers. > + */ > +enum xenbus_state { > + XenbusStateUnknown = 0, > + > + XenbusStateInitialising = 1, > + > + /* > + * InitWait: Finished early initialisation but waiting for information > + * from the peer or hotplug scripts. > + */ > + XenbusStateInitWait = 2, > + > + /* > + * Initialised: Waiting for a connection from the peer. > + */ > + XenbusStateInitialised = 3, > + > + XenbusStateConnected = 4, > + > + /* > + * Closing: The device is being closed due to an error or an unplug event. > + */ > + XenbusStateClosing = 5, > + > + XenbusStateClosed = 6, > + > + /* > + * Reconfiguring: The device is being reconfigured. > + */ > + XenbusStateReconfiguring = 7, > + > + XenbusStateReconfigured = 8 > +}; > + > +typedef enum xenbus_state XenbusState; > + > +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 4 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/include/xen/interface/io/xs_wire.h > b/include/xen/interface/io/xs_wire.h > new file mode 100644 > index 0000000000..87987334bf > --- /dev/null > +++ b/include/xen/interface/io/xs_wire.h > @@ -0,0 +1,151 @@ > +/* > + * Details of the "wire" protocol between Xen Store Daemon and client > + * library or guest kernel. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Copyright (C) 2005 Rusty Russell IBM Corporation > + */ > + > +#ifndef _XS_WIRE_H > +#define _XS_WIRE_H > + > +enum xsd_sockmsg_type { > + XS_CONTROL, > +#define XS_DEBUG XS_CONTROL > + XS_DIRECTORY, > + XS_READ, > + XS_GET_PERMS, > + XS_WATCH, > + XS_UNWATCH, > + XS_TRANSACTION_START, > + XS_TRANSACTION_END, > + XS_INTRODUCE, > + XS_RELEASE, > + XS_GET_DOMAIN_PATH, > + XS_WRITE, > + XS_MKDIR, > + XS_RM, > + XS_SET_PERMS, > + XS_WATCH_EVENT, > + XS_ERROR, > + XS_IS_DOMAIN_INTRODUCED, > + XS_RESUME, > + XS_SET_TARGET, > + /* XS_RESTRICT has been removed */ > + XS_RESET_WATCHES = XS_SET_TARGET + 2, > + XS_DIRECTORY_PART, > + > + XS_TYPE_COUNT, /* Number of valid types. */ > + > + XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */ > +}; > + > +#define XS_WRITE_NONE "NONE" > +#define XS_WRITE_CREATE "CREATE" > +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" > + > +/* We hand errors as strings, for portability. */ > +struct xsd_errors { > + int errnum; > + const char *errstring; > +}; > + > +#ifdef EINVAL > +#define XSD_ERROR(x) { x, #x } > +/* LINTED: static unused */ > +static struct xsd_errors xsd_errors[] > +#if defined(__GNUC__) > +__attribute__((unused)) > +#endif > + = { > + XSD_ERROR(EINVAL), > + XSD_ERROR(EACCES), > + XSD_ERROR(EEXIST), > + XSD_ERROR(EISDIR), > + XSD_ERROR(ENOENT), > + XSD_ERROR(ENOMEM), > + XSD_ERROR(ENOSPC), > + XSD_ERROR(EIO), > + XSD_ERROR(ENOTEMPTY), > + XSD_ERROR(ENOSYS), > + XSD_ERROR(EROFS), > + XSD_ERROR(EBUSY), > + XSD_ERROR(EAGAIN), > + XSD_ERROR(EISCONN), > + XSD_ERROR(E2BIG) > +}; > +#endif > + > +struct xsd_sockmsg { > + u32 type; /* XS_??? */ > + u32 req_id;/* Request identifier, echoed in daemon's response. */ > + u32 tx_id; /* Transaction id (0 if not related to a transaction). */ > + u32 len; /* Length of data following this. */ > + > + /* Generally followed by nul-terminated string(s). */ > +}; > + > +enum xs_watch_type { > + XS_WATCH_PATH = 0, > + XS_WATCH_TOKEN > +}; > + > +/* > + * `incontents 150 xenstore_struct XenStore wire protocol. > + * > + * Inter-domain shared memory communications. > + */ > +#define XENSTORE_RING_SIZE 1024 > +typedef u32 XENSTORE_RING_IDX; > +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE - 1)) > +struct xenstore_domain_interface { > + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ > + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ > + XENSTORE_RING_IDX req_cons, req_prod; > + XENSTORE_RING_IDX rsp_cons, rsp_prod; > + u32 server_features; /* Bitmap of features supported by the server */ > + u32 connection; > +}; > + > +/* Violating this is very bad. See docs/misc/xenstore.txt. */ > +#define XENSTORE_PAYLOAD_MAX 4096 > + > +/* Violating these just gets you an error back */ > +#define XENSTORE_ABS_PATH_MAX 3072 > +#define XENSTORE_REL_PATH_MAX 2048 > + > +/* The ability to reconnect a ring */ > +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1 > + > +/* Valid values for the connection field */ > +#define XENSTORE_CONNECTED 0 /* the steady-state */ > +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */ > + > +#endif /* _XS_WIRE_H */ > + > +/* > + * Local variables: > + * mode: C > + * c-file-style: "BSD" > + * c-basic-offset: 4 > + * tab-width: 8 > + * indent-tabs-mode: nil > + * End: > + */ > diff --git a/include/xen/interface/memory.h > b/include/xen/interface/memory.h > new file mode 100644 > index 0000000000..19959da8b4 > --- /dev/null > +++ b/include/xen/interface/memory.h > @@ -0,0 +1,332 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/************************************************************ > ****************** > + * memory.h > + * > + * Memory reservation and information. > + * > + * Copyright (c) 2005, Keir Fraser <keir at xensource.com> > + */ > + > +#ifndef __XEN_PUBLIC_MEMORY_H__ > +#define __XEN_PUBLIC_MEMORY_H__ > + > +/* > + * Increase or decrease the specified domain's memory reservation. Returns > a > + * -ve errcode on failure, or the # extents successfully allocated or freed. > + * arg == addr of struct xen_memory_reservation. > + */ > +#define XENMEM_increase_reservation 0 > +#define XENMEM_decrease_reservation 1 > +#define XENMEM_populate_physmap 6 > +struct xen_memory_reservation { > + /* > + * XENMEM_increase_reservation: > + * OUT: MFN (*not* GMFN) bases of extents that were allocated > + * XENMEM_decrease_reservation: > + * IN: GMFN bases of extents to free > + * XENMEM_populate_physmap: > + * IN: GPFN bases of extents to populate with memory > + * OUT: GMFN bases of extents that were allocated > + * (NB. This command also updates the mach_to_phys translation > table) > + */ > + GUEST_HANDLE(xen_pfn_t)extent_start; > + > + /* Number of extents, and size/alignment of each (2^extent_order > pages). */ > + xen_ulong_t nr_extents; > + unsigned int extent_order; > + > + /* > + * Maximum # bits addressable by the user of the allocated region (e.g., > + * I/O devices often have a 32-bit limitation even in 64-bit systems). If > + * zero then the user has no addressing restriction. > + * This field is not used by XENMEM_decrease_reservation. > + */ > + unsigned int address_bits; > + > + /* > + * Domain whose reservation is being changed. > + * Unprivileged domains can specify only DOMID_SELF. > + */ > + domid_t domid; > + > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); > + > +/* > + * An atomic exchange of memory pages. If return code is zero then > + * @out.extent_list provides GMFNs of the newly-allocated memory. > + * Returns zero on complete success, otherwise a negative error code. > + * On complete success then always @nr_exchanged == @in.nr_extents. > + * On partial success @nr_exchanged indicates how much work was done. > + */ > +#define XENMEM_exchange 11 > +struct xen_memory_exchange { > + /* > + * [IN] Details of memory extents to be exchanged (GMFN bases). > + * Note that @in.address_bits is ignored and unused. > + */ > + struct xen_memory_reservation in; > + > + /* > + * [IN/OUT] Details of new memory extents. > + * We require that: > + * 1. @in.domid == @out.domid > + * 2. @in.nr_extents << @in.extent_order == > + * @out.nr_extents << @out.extent_order > + * 3. @in.extent_start and @out.extent_start lists must not overlap > + * 4. @out.extent_start lists GPFN bases to be populated > + * 5. @out.extent_start is overwritten with allocated GMFN bases > + */ > + struct xen_memory_reservation out; > + > + /* > + * [OUT] Number of input extents that were successfully exchanged: > + * 1. The first @nr_exchanged input extents were successfully > + * deallocated. > + * 2. The corresponding first entries in the output extent list correctly > + * indicate the GMFNs that were successfully exchanged. > + * 3. All other input and output extents are untouched. > + * 4. If not all input exents are exchanged then the return code of this > + * command will be non-zero. > + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! > + */ > + xen_ulong_t nr_exchanged; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); > +/* > + * Returns the maximum machine frame number of mapped RAM in this > system. > + * This command always succeeds (it never returns an error code). > + * arg == NULL. > + */ > +#define XENMEM_maximum_ram_page 2 > + > +/* > + * Returns the current or maximum memory reservation, in pages, of the > + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. > + * arg == addr of domid_t. > + */ > +#define XENMEM_current_reservation 3 > +#define XENMEM_maximum_reservation 4 > + > +/* > + * Returns a list of MFN bases of 2MB extents comprising the > machine_to_phys > + * mapping table. Architectures which do not have a m2p table do not > implement > + * this command. > + * arg == addr of xen_machphys_mfn_list_t. > + */ > +#define XENMEM_machphys_mfn_list 5 > +struct xen_machphys_mfn_list { > + /* > + * Size of the 'extent_start' array. Fewer entries will be filled if the > + * machphys table is smaller than max_extents * 2MB. > + */ > + unsigned int max_extents; > + > + /* > + * Pointer to buffer to fill with list of extent starts. If there are > + * any large discontiguities in the machine address space, 2MB gaps in > + * the machphys table will be represented by an MFN base of zero. > + */ > + GUEST_HANDLE(xen_pfn_t)extent_start; > + > + /* > + * Number of extents written to the above array. This will be smaller > + * than 'max_extents' if the machphys table is smaller than max_e * > 2MB. > + */ > + unsigned int nr_extents; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); > + > +/* > + * Returns the location in virtual address space of the machine_to_phys > + * mapping table. Architectures which do not have a m2p table, or which do > not > + * map it by default into guest address space, do not implement this > command. > + * arg == addr of xen_machphys_mapping_t. > + */ > +#define XENMEM_machphys_mapping 12 > +struct xen_machphys_mapping { > + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ > + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. > */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); > + > +#define XENMAPSPACE_shared_info 0 /* shared info page */ > +#define XENMAPSPACE_grant_table 1 /* grant table page */ > +#define XENMAPSPACE_gmfn 2 /* GMFN */ > +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, > XENMEM_add_to_physmap only. */ > +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, > + * XENMEM_add_to_physmap_range only. > + */ > +#define XENMAPSPACE_dev_mmio 5 /* device mmio region */ > + > +/* > + * Sets the GPFN at which a particular page appears in the specified guest's > + * pseudophysical address space. > + * arg == addr of xen_add_to_physmap_t. > + */ > +#define XENMEM_add_to_physmap 7 > +struct xen_add_to_physmap { > + /* Which domain to change the mapping for. */ > + domid_t domid; > + > + /* Number of pages to go through for gmfn_range */ > + u16 size; > + > + /* Source mapping space. */ > + unsigned int space; > + > + /* Index into source mapping space. */ > + xen_ulong_t idx; > + > + /* GPFN where the source mapping page should appear. */ > + xen_pfn_t gpfn; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); > + > +/*** REMOVED ***/ > +/*#define XENMEM_translate_gpfn_list 8*/ > + > +#define XENMEM_add_to_physmap_range 23 > +struct xen_add_to_physmap_range { > + /* IN */ > + /* Which domain to change the mapping for. */ > + domid_t domid; > + u16 space; /* => enum phys_map_space */ > + > + /* Number of pages to go through */ > + u16 size; > + domid_t foreign_domid; /* IFF gmfn_foreign */ > + > + /* Indexes into space being mapped. */ > + GUEST_HANDLE(xen_ulong_t)idxs; > + > + /* GPFN in domid where the source mapping page should appear. */ > + GUEST_HANDLE(xen_pfn_t)gpfns; > + > + /* OUT */ > + > + /* Per index error code. */ > + GUEST_HANDLE(int)errs; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range); > + > +/* > + * Returns the pseudo-physical memory map as it was when the domain > + * was started (specified by XENMEM_set_memory_map). > + * arg == addr of struct xen_memory_map. > + */ > +#define XENMEM_memory_map 9 > +struct xen_memory_map { > + /* > + * On call the number of entries which can be stored in buffer. On > + * return the number of entries which have been stored in > + * buffer. > + */ > + unsigned int nr_entries; > + > + /* > + * Entries in the buffer are in the same format as returned by the > + * BIOS INT 0x15 EAX=0xE820 call. > + */ > + GUEST_HANDLE(void)buffer; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); > + > +/* > + * Returns the real physical memory map. Passes the same structure as > + * XENMEM_memory_map. > + * arg == addr of struct xen_memory_map. > + */ > +#define XENMEM_machine_memory_map 10 > + > +/* > + * Unmaps the page appearing at a particular GPFN from the specified > guest's > + * pseudophysical address space. > + * arg == addr of xen_remove_from_physmap_t. > + */ > +#define XENMEM_remove_from_physmap 15 > +struct xen_remove_from_physmap { > + /* Which domain to change the mapping for. */ > + domid_t domid; > + > + /* GPFN of the current mapping of the page. */ > + xen_pfn_t gpfn; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); > + > +/* > + * Get the pages for a particular guest resource, so that they can be > + * mapped directly by a tools domain. > + */ > +#define XENMEM_acquire_resource 28 > +struct xen_mem_acquire_resource { > + /* IN - The domain whose resource is to be mapped */ > + domid_t domid; > + /* IN - the type of resource */ > + u16 type; > + > +#define XENMEM_resource_ioreq_server 0 > +#define XENMEM_resource_grant_table 1 > + > + /* > + * IN - a type-specific resource identifier, which must be zero > + * unless stated otherwise. > + * > + * type == XENMEM_resource_ioreq_server -> id == ioreq server id > + * type == XENMEM_resource_grant_table -> id defined below > + */ > + u32 id; > + > +#define XENMEM_resource_grant_table_id_shared 0 > +#define XENMEM_resource_grant_table_id_status 1 > + > + /* IN/OUT - As an IN parameter number of frames of the resource > + * to be mapped. However, if the specified value is 0 and > + * frame_list is NULL then this field will be set to the > + * maximum value supported by the implementation on > return. > + */ > + u32 nr_frames; > + /* > + * OUT - Must be zero on entry. On return this may contain a bitwise > + * OR of the following values. > + */ > + u32 flags; > + > + /* The resource pages have been assigned to the calling domain */ > +#define _XENMEM_rsrc_acq_caller_owned 0 > +#define XENMEM_rsrc_acq_caller_owned (1u << > _XENMEM_rsrc_acq_caller_owned) > + > + /* > + * IN - the index of the initial frame to be mapped. This parameter > + * is ignored if nr_frames is 0. > + */ > + u64 frame; > + > +#define XENMEM_resource_ioreq_server_frame_bufioreq 0 > +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) > + > + /* > + * IN/OUT - If the tools domain is PV then, upon return, frame_list > + * will be populated with the MFNs of the resource. > + * If the tools domain is HVM then it is expected that, on > + * entry, frame_list will be populated with a list of GFNs > + * that will be mapped to the MFNs of the resource. > + * If -EIO is returned then the frame_list has only been > + * partially mapped and it is up to the caller to unmap all > + * the GFNs. > + * This parameter may be NULL if nr_frames is 0. > + */ > + GUEST_HANDLE(xen_pfn_t)frame_list; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource); > + > +#endif /* __XEN_PUBLIC_MEMORY_H__ */ > diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h > new file mode 100644 > index 0000000000..0f12dcf267 > --- /dev/null > +++ b/include/xen/interface/sched.h > @@ -0,0 +1,188 @@ > +/************************************************************ > ****************** > + * sched.h > + * > + * Scheduler state interactions > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Copyright (c) 2005, Keir Fraser <keir at xensource.com> > + */ > + > +#ifndef __XEN_PUBLIC_SCHED_H__ > +#define __XEN_PUBLIC_SCHED_H__ > + > +#include <xen/interface/event_channel.h> > + > +/* > + * Guest Scheduler Operations > + * > + * The SCHEDOP interface provides mechanisms for a guest to interact > + * with the scheduler, including yield, blocking and shutting itself > + * down. > + */ > + > +/* > + * The prototype for this hypercall is: > + * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) > + * > + * @cmd == SCHEDOP_??? (scheduler operation). > + * @arg == Operation-specific extra argument(s), as described below. > + * ... == Additional Operation-specific extra arguments, described below. > + * > + * Versions of Xen prior to 3.0.2 provided only the following legacy version > + * of this hypercall, supporting only the commands yield, block and > shutdown: > + * long sched_op(int cmd, unsigned long arg) > + * @cmd == SCHEDOP_??? (scheduler operation). > + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) > + * == SHUTDOWN_* code (SCHEDOP_shutdown) > + * > + * This legacy version is available to new guests as: > + * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long > arg) > + */ > + > +/* > + * Voluntarily yield the CPU. > + * @arg == NULL. > + */ > +#define SCHEDOP_yield 0 > + > +/* > + * Block execution of this VCPU until an event is received for processing. > + * If called with event upcalls masked, this operation will atomically > + * reenable event delivery and check for pending events before blocking the > + * VCPU. This avoids a "wakeup waiting" race. > + * @arg == NULL. > + */ > +#define SCHEDOP_block 1 > + > +/* > + * Halt execution of this domain (all VCPUs) and notify the system controller. > + * @arg == pointer to sched_shutdown structure. > + * > + * If the sched_shutdown_t reason is SHUTDOWN_suspend then > + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN > + * of the guest's start info page. RDX/EDX is the third hypercall > + * argument. > + * > + * In addition, which reason is SHUTDOWN_suspend this hypercall > + * returns 1 if suspend was cancelled or the domain was merely > + * checkpointed, and 0 if it is resuming in a new domain. > + */ > +#define SCHEDOP_shutdown 2 > + > +/* > + * Poll a set of event-channel ports. Return when one or more are pending. > An > + * optional timeout may be specified. > + * @arg == pointer to sched_poll structure. > + */ > +#define SCHEDOP_poll 3 > + > +/* > + * Declare a shutdown for another domain. The main use of this function is > + * in interpreting shutdown requests and reasons for fully-virtualized > + * domains. A para-virtualized domain may use SCHEDOP_shutdown > directly. > + * @arg == pointer to sched_remote_shutdown structure. > + */ > +#define SCHEDOP_remote_shutdown 4 > + > +/* > + * Latch a shutdown code, so that when the domain later shuts down it > + * reports this code to the control tools. > + * @arg == sched_shutdown, as for SCHEDOP_shutdown. > + */ > +#define SCHEDOP_shutdown_code 5 > + > +/* > + * Setup, poke and destroy a domain watchdog timer. > + * @arg == pointer to sched_watchdog structure. > + * With id == 0, setup a domain watchdog timer to cause domain shutdown > + * after timeout, returns watchdog id. > + * With id != 0 and timeout == 0, destroy domain watchdog timer. > + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. > + */ > +#define SCHEDOP_watchdog 6 > + > +/* > + * Override the current vcpu affinity by pinning it to one physical cpu or > + * undo this override restoring the previous affinity. > + * @arg == pointer to sched_pin_override structure. > + * > + * A negative pcpu value will undo a previous pin override and restore the > + * previous cpu affinity. > + * This call is allowed for the hardware domain only and requires the cpu > + * to be part of the domain's cpupool. > + */ > +#define SCHEDOP_pin_override 7 > + > +struct sched_shutdown { > + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); > + > +struct sched_poll { > + GUEST_HANDLE(evtchn_port_t)ports; > + unsigned int nr_ports; > + u64 timeout; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(sched_poll); > + > +struct sched_remote_shutdown { > + domid_t domain_id; /* Remote domain ID */ > + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown); > + > +struct sched_watchdog { > + u32 id; /* watchdog ID */ > + u32 timeout; /* timeout */ > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog); > + > +struct sched_pin_override { > + s32 pcpu; > +}; > + > +DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override); > + > +/* > + * Reason codes for SCHEDOP_shutdown. These may be interpreted by > control > + * software to determine the appropriate action. For the most part, Xen does > + * not care about the shutdown code. > + */ > +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up > and kill. */ > +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. > */ > +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. > */ > +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. > */ > +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time > expired. */ > + > +/* > + * Domain asked to perform 'soft reset' for it. The expected behavior is to > + * reset internal Xen state for the domain returning it to the point where it > + * was created but leaving the domain's memory contents and vCPU > contexts > + * intact. This will allow the domain to start over and set up all Xen specific > + * interfaces again. > + */ > +#define SHUTDOWN_soft_reset 5 > +#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. > */ > + > +#endif /* __XEN_PUBLIC_SCHED_H__ */ > diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h > new file mode 100644 > index 0000000000..964daaedfb > --- /dev/null > +++ b/include/xen/interface/xen.h > @@ -0,0 +1,225 @@ > +/************************************************************ > ****************** > + * xen.h > + * > + * Guest OS interface to Xen. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > copy > + * of this software and associated documentation files (the "Software"), to > + * deal in the Software without restriction, including without limitation the > + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or > + * sell copies of the Software, and to permit persons to whom the Software is > + * furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > EVENT SHALL THE > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > OR OTHER > + * DEALINGS IN THE SOFTWARE. > + * > + * Copyright (c) 2004, K A Fraser > + */ > + > +#ifndef __XEN_PUBLIC_XEN_H__ > +#define __XEN_PUBLIC_XEN_H__ > + > +#include <xen/arm/interface.h> > + > +/* > + * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). > + */ > + > +/* > + * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. > + * EAX = return value > + * (argument registers may be clobbered on return) > + * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6. > + * RAX = return value > + * (argument registers not clobbered on return; RCX, R11 are) > + */ > +#define __HYPERVISOR_set_trap_table 0 > +#define __HYPERVISOR_mmu_update 1 > +#define __HYPERVISOR_set_gdt 2 > +#define __HYPERVISOR_stack_switch 3 > +#define __HYPERVISOR_set_callbacks 4 > +#define __HYPERVISOR_fpu_taskswitch 5 > +#define __HYPERVISOR_sched_op_compat 6 > +#define __HYPERVISOR_platform_op 7 > +#define __HYPERVISOR_set_debugreg 8 > +#define __HYPERVISOR_get_debugreg 9 > +#define __HYPERVISOR_update_descriptor 10 > +#define __HYPERVISOR_memory_op 12 > +#define __HYPERVISOR_multicall 13 > +#define __HYPERVISOR_update_va_mapping 14 > +#define __HYPERVISOR_set_timer_op 15 > +#define __HYPERVISOR_event_channel_op_compat 16 > +#define __HYPERVISOR_xen_version 17 > +#define __HYPERVISOR_console_io 18 > +#define __HYPERVISOR_physdev_op_compat 19 > +#define __HYPERVISOR_grant_table_op 20 > +#define __HYPERVISOR_vm_assist 21 > +#define __HYPERVISOR_update_va_mapping_otherdomain 22 > +#define __HYPERVISOR_iret 23 /* x86 only */ > +#define __HYPERVISOR_vcpu_op 24 > +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ > +#define __HYPERVISOR_mmuext_op 26 > +#define __HYPERVISOR_xsm_op 27 > +#define __HYPERVISOR_nmi_op 28 > +#define __HYPERVISOR_sched_op 29 > +#define __HYPERVISOR_callback_op 30 > +#define __HYPERVISOR_xenoprof_op 31 > +#define __HYPERVISOR_event_channel_op 32 > +#define __HYPERVISOR_physdev_op 33 > +#define __HYPERVISOR_hvm_op 34 > +#define __HYPERVISOR_sysctl 35 > +#define __HYPERVISOR_domctl 36 > +#define __HYPERVISOR_kexec_op 37 > +#define __HYPERVISOR_tmem_op 38 > +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for > XenClient */ > +#define __HYPERVISOR_xenpmu_op 40 > +#define __HYPERVISOR_dm_op 41 > + > +/* Architecture-specific hypercall definitions. */ > +#define __HYPERVISOR_arch_0 48 > +#define __HYPERVISOR_arch_1 49 > +#define __HYPERVISOR_arch_2 50 > +#define __HYPERVISOR_arch_3 51 > +#define __HYPERVISOR_arch_4 52 > +#define __HYPERVISOR_arch_5 53 > +#define __HYPERVISOR_arch_6 54 > +#define __HYPERVISOR_arch_7 55 > + > +#ifndef __ASSEMBLY__ > + > +typedef u16 domid_t; > + > +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary > domains. */ > +#define DOMID_FIRST_RESERVED (0x7FF0U) > + > +/* DOMID_SELF is used in certain contexts to refer to oneself. */ > +#define DOMID_SELF (0x7FF0U) > + > +/* > + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. > + * Although no Foreign Domain need be specified to map I/O pages, > DOMID_IO > + * is useful to ensure that no mappings to the OS's own heap are accidentally > + * installed. (e.g., in Linux this could cause havoc as reference counts > + * aren't adjusted on the I/O-mapping code path). > + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that > context can > + * be specified by any calling domain. > + */ > +#define DOMID_IO (0x7FF1U) > + > +/* > + * DOMID_XEN is used to allow privileged domains to map restricted parts of > + * Xen's heap space (e.g., the machine_to_phys table). > + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only > permitted if > + * the caller is privileged. > + */ > +#define DOMID_XEN (0x7FF2U) > + > +/* DOMID_COW is used as the owner of sharable pages */ > +#define DOMID_COW (0x7FF3U) > + > +/* DOMID_INVALID is used to identify pages with unknown owner. */ > +#define DOMID_INVALID (0x7FF4U) > + > +/* Idle domain. */ > +#define DOMID_IDLE (0x7FFFU) > + > +struct vcpu_info { > + /* > + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate > + * a pending notification for a particular VCPU. It is then cleared > + * by the guest OS /before/ checking for pending work, thus avoiding > + * a set-and-check race. Note that the mask is only accessed by Xen > + * on the CPU that is currently hosting the VCPU. This means that the > + * pending and mask flags can be updated by the guest without special > + * synchronisation (i.e., no need for the x86 LOCK prefix). > + * This may seem suboptimal because if the pending flag is set by > + * a different CPU then an IPI may be scheduled even when the mask > + * is set. However, note: > + * 1. The task of 'interrupt holdoff' is covered by the per-event- > + * channel mask bits. A 'noisy' event that is continually being > + * triggered can be masked at source at this very precise > + * granularity. > + * 2. The main purpose of the per-VCPU mask is therefore to restrict > + * reentrant execution: whether for concurrency control, or to > + * prevent unbounded stack usage. Whatever the purpose, we > expect > + * that the mask will be asserted only for short periods at a time, > + * and so the likelihood of a 'spurious' IPI is suitably small. > + * The mask is read before making an event upcall to the guest: a > + * non-zero mask therefore guarantees that the VCPU will not receive > + * an upcall activation. The mask is cleared when the VCPU requests > + * to block: this avoids wakeup-waiting races. > + */ > + u8 evtchn_upcall_pending; > + u8 evtchn_upcall_mask; > + xen_ulong_t evtchn_pending_sel; > + struct arch_vcpu_info arch; > + struct pvclock_vcpu_time_info time; > +}; /* 64 bytes (x86) */ > + > +/* > + * Xen/kernel shared data -- pointer provided in start_info. > + * NB. We expect that this struct is smaller than a page. > + */ > +struct shared_info { > + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; > + > + /* > + * A domain can create "event channels" on which it can send and > receive > + * asynchronous event notifications. There are three classes of event > that > + * are delivered by this mechanism: > + * 1. Bi-directional inter- and intra-domain connections. Domains must > + * arrange out-of-band to set up a connection (usually by allocating > + * an unbound 'listener' port and avertising that via a storage > service > + * such as xenstore). > + * 2. Physical interrupts. A domain with suitable hardware-access > + * privileges can bind an event-channel port to a physical interrupt > + * source. > + * 3. Virtual interrupts ('events'). A domain can bind an event-channel > + * port to a virtual interrupt source, such as the virtual-timer > + * device or the emergency console. > + * > + * Event channels are addressed by a "port index". Each channel is > + * associated with two bits of information: > + * 1. PENDING -- notifies the domain that there is a pending > notification > + * to be processed. This bit is cleared by the guest. > + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING > + * will cause an asynchronous upcall to be scheduled. This bit is > only > + * updated by the guest. It is read-only within Xen. If a channel > + * becomes pending while the channel is masked then the 'edge' is > lost > + * (i.e., when the channel is unmasked, the guest must manually > handle > + * pending notifications as no upcall will be scheduled by Xen). > + * > + * To expedite scanning of pending notifications, any 0->1 pending > + * transition on an unmasked channel causes a corresponding bit in a > + * per-vcpu selector word to be set. Each bit in the selector covers a > + * 'C long' in the PENDING bitfield array. > + */ > + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; > + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; > + > + /* > + * Wallclock time: updated only by control software. Guests should base > + * their gettimeofday() syscall on this wallclock-base value. > + */ > + struct pvclock_wall_clock wc; > + > + struct arch_shared_info arch; > + > +}; > + > +#else /* __ASSEMBLY__ */ > + > +/* In assembly code we cannot use C numeric constant suffixes. */ > +#define mk_unsigned_long(x) x > + > +#endif /* !__ASSEMBLY__ */ > + > +#endif /* __XEN_PUBLIC_XEN_H__ */ > -- > 2.17.1
Hi Peng, On Thu, 2020-07-02 at 01:30 +0000, Peng Fan wrote: > > Subject: [PATCH 04/17] xen: Add essential and required interface > > headers > > > > From: Oleksandr Andrushchenko <oleksandr_andrushchenko at epam.com> > > > > Add essential and required Xen interface headers only taken from > > the stable Linux kernel stable/linux-5.7.y at commit > > 66dfe45221605e11f38a0bf5eb2ee808cea7cfe7. > > Please use commit <12+> ("commit header") Ok, will fix it in the next version. > > > > > These are better suited for U-boot than the original headers > > from Xen as they are the stripped versions of the same. > > > > At the same time use public protocols from Xen RELEASE-4.13.1, at > > commit 6278553325a9f76d37811923221b21db3882e017 > > Please use commit <12+> ("commit header") Ok, will fix it in the next version. > > Then: > > Acked-by: Peng Fan <peng.fan at nxp.com> Regards, Anastasiia > > > as those have more comments in them. > > > > Signed-off-by: Oleksandr Andrushchenko > > <oleksandr_andrushchenko at epam.com> > > Signed-off-by: Anastasiia Lukianenko < > > anastasiia_lukianenko at epam.com> > > --- > > include/xen/arm/interface.h | 88 ++++ > > include/xen/interface/event_channel.h | 281 ++++++++++ > > include/xen/interface/grant_table.h | 582 +++++++++++++++++++++ > > include/xen/interface/hvm/hvm_op.h | 69 +++ > > include/xen/interface/hvm/params.h | 127 +++++ > > include/xen/interface/io/blkif.h | 726 > > ++++++++++++++++++++++++++ > > include/xen/interface/io/console.h | 56 ++ > > include/xen/interface/io/protocols.h | 42 ++ > > include/xen/interface/io/ring.h | 479 +++++++++++++++++ > > include/xen/interface/io/xenbus.h | 81 +++ > > include/xen/interface/io/xs_wire.h | 151 ++++++ > > include/xen/interface/memory.h | 332 ++++++++++++ > > include/xen/interface/sched.h | 188 +++++++ > > include/xen/interface/xen.h | 225 ++++++++ > > 14 files changed, 3427 insertions(+) > > create mode 100644 include/xen/arm/interface.h > > create mode 100644 include/xen/interface/event_channel.h > > create mode 100644 include/xen/interface/grant_table.h > > create mode 100644 include/xen/interface/hvm/hvm_op.h > > create mode 100644 include/xen/interface/hvm/params.h > > create mode 100644 include/xen/interface/io/blkif.h > > create mode 100644 include/xen/interface/io/console.h > > create mode 100644 include/xen/interface/io/protocols.h > > create mode 100644 include/xen/interface/io/ring.h > > create mode 100644 include/xen/interface/io/xenbus.h > > create mode 100644 include/xen/interface/io/xs_wire.h > > create mode 100644 include/xen/interface/memory.h > > create mode 100644 include/xen/interface/sched.h > > create mode 100644 include/xen/interface/xen.h > > > > diff --git a/include/xen/arm/interface.h > > b/include/xen/arm/interface.h > > new file mode 100644 > > index 0000000000..79d5ae8563 > > --- /dev/null > > +++ b/include/xen/arm/interface.h > > @@ -0,0 +1,88 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/************************************************************ > > ****************** > > + * Guest OS interface to ARM Xen. > > + * > > + * Stefano Stabellini <stefano.stabellini at eu.citrix.com>, Citrix, > > 2012 > > + */ > > + > > +#ifndef _ASM_ARM_XEN_INTERFACE_H > > +#define _ASM_ARM_XEN_INTERFACE_H > > + > > +#ifndef __ASSEMBLY__ > > +#include <linux/types.h> > > +#endif > > + > > +#define uint64_aligned_t u64 __attribute__((aligned(8))) > > + > > +#define __DEFINE_GUEST_HANDLE(name, type) \ > > + typedef struct { union { type *p; uint64_aligned_t q; }; } \ > > + __guest_handle_ ## name > > + > > +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ > > + __DEFINE_GUEST_HANDLE(name, struct name) > > +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, > > name) > > +#define GUEST_HANDLE(name) __guest_handle_ ## name > > + > > +#define set_xen_guest_handle(hnd, val) \ > > + do { \ > > + if (sizeof(hnd) == 8) \ > > + *(u64 *)&(hnd) = 0; \ > > + (hnd).p = val; \ > > + } while (0) > > + > > +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op > > + > > +#ifndef __ASSEMBLY__ > > +/* Explicitly size integers that represent pfns in the interface > > with > > + * Xen so that we can have one ABI that works for 32 and 64 bit > > guests. > > + * Note that this means that the xen_pfn_t type may be capable of > > + * representing pfn's which the guest cannot represent in its own > > pfn > > + * type. However since pfn space is controlled by the guest this > > is > > + * fine since it simply wouldn't be able to create any sure pfns > > in > > + * the first place. > > + */ > > +typedef u64 xen_pfn_t; > > +#define PRI_xen_pfn "llx" > > +typedef u64 xen_ulong_t; > > +#define PRI_xen_ulong "llx" > > +typedef s64 xen_long_t; > > +#define PRI_xen_long "llx" > > +/* Guest handles for primitive C types. */ > > +__DEFINE_GUEST_HANDLE(uchar, unsigned char); > > +__DEFINE_GUEST_HANDLE(uint, unsigned int); > > +DEFINE_GUEST_HANDLE(char); > > +DEFINE_GUEST_HANDLE(int); > > +DEFINE_GUEST_HANDLE(void); > > +DEFINE_GUEST_HANDLE(u64); > > +DEFINE_GUEST_HANDLE(u32); > > +DEFINE_GUEST_HANDLE(xen_pfn_t); > > +DEFINE_GUEST_HANDLE(xen_ulong_t); > > + > > +/* Maximum number of virtual CPUs in multi-processor guests. */ > > +#define MAX_VIRT_CPUS 1 > > + > > +struct arch_vcpu_info { }; > > +struct arch_shared_info { }; > > + > > +/* TODO: Move pvclock definitions some place arch independent */ > > +struct pvclock_vcpu_time_info { > > + u32 version; > > + u32 pad0; > > + u64 tsc_timestamp; > > + u64 system_time; > > + u32 tsc_to_system_mul; > > + s8 tsc_shift; > > + u8 flags; > > + u8 pad[2]; > > +} __attribute__((__packed__)); /* 32 bytes */ > > + > > +/* It is OK to have a 12 bytes struct with no padding because it > > is packed */ > > +struct pvclock_wall_clock { > > + u32 version; > > + u32 sec; > > + u32 nsec; > > + u32 sec_hi; > > +} __attribute__((__packed__)); > > +#endif > > + > > +#endif /* _ASM_ARM_XEN_INTERFACE_H */ > > diff --git a/include/xen/interface/event_channel.h > > b/include/xen/interface/event_channel.h > > new file mode 100644 > > index 0000000000..8174999c2f > > --- /dev/null > > +++ b/include/xen/interface/event_channel.h > > @@ -0,0 +1,281 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/************************************************************ > > ****************** > > + * event_channel.h > > + * > > + * Event channels between domains. > > + * > > + * Copyright (c) 2003-2004, K A Fraser. > > + */ > > + > > +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ > > +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ > > + > > +#include <xen/interface/xen.h> > > + > > +typedef u32 evtchn_port_t; > > +DEFINE_GUEST_HANDLE(evtchn_port_t); > > + > > +/* > > + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and > > mark as > > + * accepting interdomain bindings from domain <remote_dom>. A > > fresh port > > + * is allocated in <dom> and returned as <port>. > > + * NOTES: > > + * 1. If the caller is unprivileged then <dom> must be > > DOMID_SELF. > > + * 2. <rdom> may be DOMID_SELF, allowing loopback connections. > > + */ > > +#define EVTCHNOP_alloc_unbound 6 > > +struct evtchn_alloc_unbound { > > + /* IN parameters */ > > + domid_t dom, remote_dom; > > + /* OUT parameters */ > > + evtchn_port_t port; > > +}; > > + > > +/* > > + * EVTCHNOP_bind_interdomain: Construct an interdomain event > > channel > > between > > + * the calling domain and <remote_dom>. <remote_dom,remote_port> > > must > > identify > > + * a port that is unbound and marked as accepting bindings from > > the calling > > + * domain. A fresh port is allocated in the calling domain and > > returned as > > + * <local_port>. > > + * NOTES: > > + * 2. <remote_dom> may be DOMID_SELF, allowing loopback > > connections. > > + */ > > +#define EVTCHNOP_bind_interdomain 0 > > +struct evtchn_bind_interdomain { > > + /* IN parameters. */ > > + domid_t remote_dom; > > + evtchn_port_t remote_port; > > + /* OUT parameters. */ > > + evtchn_port_t local_port; > > +}; > > + > > +/* > > + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on > > specified > > + * vcpu. > > + * NOTES: > > + * 1. A virtual IRQ may be bound to at most one event channel per > > vcpu. > > + * 2. The allocated event channel is bound to the specified vcpu. > > The > > binding > > + * may not be changed. > > + */ > > +#define EVTCHNOP_bind_virq 1 > > +struct evtchn_bind_virq { > > + /* IN parameters. */ > > + u32 virq; > > + u32 vcpu; > > + /* OUT parameters. */ > > + evtchn_port_t port; > > +}; > > + > > +/* > > + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. > > + * NOTES: > > + * 1. A physical IRQ may be bound to at most one event channel > > per > > domain. > > + * 2. Only a sufficiently-privileged domain may bind to a > > physical IRQ. > > + */ > > +#define EVTCHNOP_bind_pirq 2 > > +struct evtchn_bind_pirq { > > + /* IN parameters. */ > > + u32 pirq; > > +#define BIND_PIRQ__WILL_SHARE 1 > > + u32 flags; /* BIND_PIRQ__* */ > > + /* OUT parameters. */ > > + evtchn_port_t port; > > +}; > > + > > +/* > > + * EVTCHNOP_bind_ipi: Bind a local event channel to receive > > events. > > + * NOTES: > > + * 1. The allocated event channel is bound to the specified vcpu. > > The > > binding > > + * may not be changed. > > + */ > > +#define EVTCHNOP_bind_ipi 7 > > +struct evtchn_bind_ipi { > > + u32 vcpu; > > + /* OUT parameters. */ > > + evtchn_port_t port; > > +}; > > + > > +/* > > + * EVTCHNOP_close: Close a local event channel <port>. If the > > channel is > > + * interdomain then the remote end is placed in the unbound state > > + * (EVTCHNSTAT_unbound), awaiting a new connection. > > + */ > > +#define EVTCHNOP_close 3 > > +struct evtchn_close { > > + /* IN parameters. */ > > + evtchn_port_t port; > > +}; > > + > > +/* > > + * EVTCHNOP_send: Send an event to the remote end of the channel > > whose > > local > > + * endpoint is <port>. > > + */ > > +#define EVTCHNOP_send 4 > > +struct evtchn_send { > > + /* IN parameters. */ > > + evtchn_port_t port; > > +}; > > + > > +/* > > + * EVTCHNOP_status: Get the current status of the communication > > channel > > which > > + * has an endpoint at <dom, port>. > > + * NOTES: > > + * 1. <dom> may be specified as DOMID_SELF. > > + * 2. Only a sufficiently-privileged domain may obtain the status > > of an > > event > > + * channel for which <dom> is not DOMID_SELF. > > + */ > > +#define EVTCHNOP_status 5 > > +struct evtchn_status { > > + /* IN parameters */ > > + domid_t dom; > > + evtchn_port_t port; > > + /* OUT parameters */ > > +#define EVTCHNSTAT_closed 0 /* Channel is not in use. > > */ > > +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom > > connection.*/ > > +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to > > remote > > domain. */ > > +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a > > phys IRQ line. > > */ > > +#define EVTCHNSTAT_virq 4 /* Channel is bound to a > > virtual IRQ line > > */ > > +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a > > virtual IPI line > > */ > > + u32 status; > > + u32 vcpu; /* VCPU to which this channel is > > bound. */ > > + union { > > + struct { > > + domid_t dom; > > + } unbound; /* EVTCHNSTAT_unbound */ > > + struct { > > + domid_t dom; > > + evtchn_port_t port; > > + } interdomain; /* EVTCHNSTAT_interdomain */ > > + u32 pirq; /* EVTCHNSTAT_pirq */ > > + u32 virq; /* EVTCHNSTAT_virq */ > > + } u; > > +}; > > + > > +/* > > + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify > > when > > an > > + * event is pending. > > + * NOTES: > > + * 1. IPI- and VIRQ-bound channels always notify the vcpu that > > initialised > > + * the binding. This binding cannot be changed. > > + * 2. All other channels notify vcpu0 by default. This default is > > set when > > + * the channel is allocated (a port that is freed and > > subsequently reused > > + * has its binding reset to vcpu0). > > + */ > > +#define EVTCHNOP_bind_vcpu 8 > > +struct evtchn_bind_vcpu { > > + /* IN parameters. */ > > + evtchn_port_t port; > > + u32 vcpu; > > +}; > > + > > +/* > > + * EVTCHNOP_unmask: Unmask the specified local event-channel port > > and > > deliver > > + * a notification to the appropriate VCPU if an event is pending. > > + */ > > +#define EVTCHNOP_unmask 9 > > +struct evtchn_unmask { > > + /* IN parameters. */ > > + evtchn_port_t port; > > +}; > > + > > +/* > > + * EVTCHNOP_reset: Close all event channels associated with > > specified > > domain. > > + * NOTES: > > + * 1. <dom> may be specified as DOMID_SELF. > > + * 2. Only a sufficiently-privileged domain may specify other > > than > > DOMID_SELF. > > + */ > > +#define EVTCHNOP_reset 10 > > +struct evtchn_reset { > > + /* IN parameters. */ > > + domid_t dom; > > +}; > > + > > +typedef struct evtchn_reset evtchn_reset_t; > > + > > +/* > > + * EVTCHNOP_init_control: initialize the control block for the > > FIFO ABI. > > + */ > > +#define EVTCHNOP_init_control 11 > > +struct evtchn_init_control { > > + /* IN parameters. */ > > + u64 control_gfn; > > + u32 offset; > > + u32 vcpu; > > + /* OUT parameters. */ > > + u8 link_bits; > > + u8 _pad[7]; > > +}; > > + > > +/* > > + * EVTCHNOP_expand_array: add an additional page to the event > > array. > > + */ > > +#define EVTCHNOP_expand_array 12 > > +struct evtchn_expand_array { > > + /* IN parameters. */ > > + u64 array_gfn; > > +}; > > + > > +/* > > + * EVTCHNOP_set_priority: set the priority for an event channel. > > + */ > > +#define EVTCHNOP_set_priority 13 > > +struct evtchn_set_priority { > > + /* IN parameters. */ > > + evtchn_port_t port; > > + u32 priority; > > +}; > > + > > +struct evtchn_op { > > + u32 cmd; /* EVTCHNOP_* */ > > + union { > > + struct evtchn_alloc_unbound alloc_unbound; > > + struct evtchn_bind_interdomain bind_interdomain; > > + struct evtchn_bind_virq bind_virq; > > + struct evtchn_bind_pirq bind_pirq; > > + struct evtchn_bind_ipi bind_ipi; > > + struct evtchn_close close; > > + struct evtchn_send send; > > + struct evtchn_status status; > > + struct evtchn_bind_vcpu bind_vcpu; > > + struct evtchn_unmask unmask; > > + } u; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); > > + > > +/* > > + * 2-level ABI > > + */ > > + > > +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * > > sizeof(xen_ulong_t) * 64) > > + > > +/* > > + * FIFO ABI > > + */ > > + > > +/* Events may have priorities from 0 (highest) to 15 (lowest). */ > > +#define EVTCHN_FIFO_PRIORITY_MAX 0 > > +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 > > +#define EVTCHN_FIFO_PRIORITY_MIN 15 > > + > > +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) > > + > > +typedef u32 event_word_t; > > + > > +#define EVTCHN_FIFO_PENDING 31 > > +#define EVTCHN_FIFO_MASKED 30 > > +#define EVTCHN_FIFO_LINKED 29 > > +#define EVTCHN_FIFO_BUSY 28 > > + > > +#define EVTCHN_FIFO_LINK_BITS 17 > > +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) > > + > > +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) > > + > > +struct evtchn_fifo_control_block { > > + u32 ready; > > + u32 _rsvd; > > + event_word_t head[EVTCHN_FIFO_MAX_QUEUES]; > > +}; > > + > > +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ > > diff --git a/include/xen/interface/grant_table.h > > b/include/xen/interface/grant_table.h > > new file mode 100644 > > index 0000000000..197a0d0d58 > > --- /dev/null > > +++ b/include/xen/interface/grant_table.h > > @@ -0,0 +1,582 @@ > > +/************************************************************ > > ****************** > > + * grant_table.h > > + * > > + * Interface for granting foreign access to page frames, and > > receiving > > + * page-ownership transfers. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Copyright (c) 2004, K A Fraser > > + */ > > + > > +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ > > +#define __XEN_PUBLIC_GRANT_TABLE_H__ > > + > > +#include <xen/interface/xen.h> > > + > > +/*********************************** > > + * GRANT TABLE REPRESENTATION > > + */ > > + > > +/* Some rough guidelines on accessing and updating grant-table > > entries > > + * in a concurrency-safe manner. For more information, Linux > > contains a > > + * reference implementation for guest OSes > > (arch/xen/kernel/grant_table.c). > > + * > > + * NB. WMB is a no-op on current-generation x86 processors. > > However, a > > + * compiler barrier will still be required. > > + * > > + * Introducing a valid entry into the grant table: > > + * 1. Write ent->domid. > > + * 2. Write ent->frame: > > + * GTF_permit_access: Frame to which access is permitted. > > + * GTF_accept_transfer: Pseudo-phys frame slot being filled > > by new > > + * frame, or zero if none. > > + * 3. Write memory barrier (WMB). > > + * 4. Write ent->flags, inc. valid type. > > + * > > + * Invalidating an unused GTF_permit_access entry: > > + * 1. flags = ent->flags. > > + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). > > + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). > > + * NB. No need for WMB as reuse of entry is control-dependent on > > success > > of > > + * step 3, and all architectures guarantee ordering of ctrl- > > dep writes. > > + * > > + * Invalidating an in-use GTF_permit_access entry: > > + * This cannot be done directly. Request assistance from the > > domain > > controller > > + * which can set a timeout on the use of a grant entry and take > > necessary > > + * action. (NB. This is not yet implemented!). > > + * > > + * Invalidating an unused GTF_accept_transfer entry: > > + * 1. flags = ent->flags. > > + * 2. Observe that !(flags & GTF_transfer_committed). [*] > > + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). > > + * NB. No need for WMB as reuse of entry is control-dependent on > > success > > of > > + * step 3, and all architectures guarantee ordering of ctrl- > > dep writes. > > + * [*] If GTF_transfer_committed is set then the grant entry is > > 'committed'. > > + * The guest must /not/ modify the grant entry until the > > address of > > the > > + * transferred frame is written. It is safe for the guest to > > spin waiting > > + * for this to occur (detect by observing > > GTF_transfer_completed in > > + * ent->flags). > > + * > > + * Invalidating a committed GTF_accept_transfer entry: > > + * 1. Wait for (ent->flags & GTF_transfer_completed). > > + * > > + * Changing a GTF_permit_access from writable to read-only: > > + * Use SMP-safe CMPXCHG to set GTF_readonly, while > > checking !GTF_writing. > > + * > > + * Changing a GTF_permit_access from read-only to writable: > > + * Use SMP-safe bit-setting instruction. > > + */ > > + > > +/* > > + * Reference to a grant entry in a specified domain's grant table. > > + */ > > +typedef u32 grant_ref_t; > > + > > +/* > > + * A grant table comprises a packed array of grant entries in one > > or more > > + * page frames shared between Xen and a guest. > > + * [XEN]: This field is written by Xen and read by the sharing > > guest. > > + * [GST]: This field is written by the guest and read by Xen. > > + */ > > + > > +/* > > + * Version 1 of the grant table entry structure is maintained > > purely > > + * for backwards compatibility. New guests should use version 2. > > + */ > > +struct grant_entry_v1 { > > + /* GTF_xxx: various type and flag information. [XEN,GST] */ > > + u16 flags; > > + /* The domain being granted foreign privileges. [GST] */ > > + domid_t domid; > > + /* > > + * GTF_permit_access: Frame that @domid is allowed to map and > > access. [GST] > > + * GTF_accept_transfer: Frame whose ownership transferred by > > @domid. [XEN] > > + */ > > + u32 frame; > > +}; > > + > > +/* > > + * Type of grant entry. > > + * GTF_invalid: This grant entry grants no privileges. > > + * GTF_permit_access: Allow @domid to map/access @frame. > > + * GTF_accept_transfer: Allow @domid to transfer ownership of one > > page > > frame > > + * to this guest. Xen writes the page number > > to > > @frame. > > + * GTF_transitive: Allow @domid to transitively access a subrange > > of > > + * @trans_grant in @trans_domid. No mappings are > > allowed. > > + */ > > +#define GTF_invalid (0U << 0) > > +#define GTF_permit_access (1U << 0) > > +#define GTF_accept_transfer (2U << 0) > > +#define GTF_transitive (3U << 0) > > +#define GTF_type_mask (3U << 0) > > + > > +/* > > + * Subflags for GTF_permit_access. > > + * GTF_readonly: Restrict @domid to read-only mappings and > > accesses. > > [GST] > > + * GTF_reading: Grant entry is currently mapped for reading by > > @domid. > > [XEN] > > + * GTF_writing: Grant entry is currently mapped for writing by > > @domid. > > [XEN] > > + * GTF_sub_page: Grant access to only a subrange of the > > page. @domid > > + * will only be allowed to copy from the grant, and > > not > > + * map it. [GST] > > + */ > > +#define _GTF_readonly (2) > > +#define GTF_readonly (1U << _GTF_readonly) > > +#define _GTF_reading (3) > > +#define GTF_reading (1U << _GTF_reading) > > +#define _GTF_writing (4) > > +#define GTF_writing (1U << _GTF_writing) > > +#define _GTF_sub_page (8) > > +#define GTF_sub_page (1U << _GTF_sub_page) > > + > > +/* > > + * Subflags for GTF_accept_transfer: > > + * GTF_transfer_committed: Xen sets this flag to indicate that it > > is > > committed > > + * to transferring ownership of a page frame. When a guest > > sees this > > flag > > + * it must /not/ modify the grant entry until > > GTF_transfer_completed > > is > > + * set by Xen. > > + * GTF_transfer_completed: It is safe for the guest to spin-wait > > on this flag > > + * after reading GTF_transfer_committed. Xen will always > > write the > > frame > > + * address, followed by ORing this flag, in a timely manner. > > + */ > > +#define _GTF_transfer_committed (2) > > +#define GTF_transfer_committed (1U << _GTF_transfer_committed) > > +#define _GTF_transfer_completed (3) > > +#define GTF_transfer_completed (1U << _GTF_transfer_completed) > > + > > +/* > > + * Version 2 grant table entries. These fulfil the same role as > > + * version 1 entries, but can represent more complicated > > operations. > > + * Any given domain will have either a version 1 or a version 2 > > table, > > + * and every entry in the table will be the same version. > > + * > > + * The interface by which domains use grant references does not > > depend > > + * on the grant table version in use by the other domain. > > + */ > > + > > +/* > > + * Version 1 and version 2 grant entries share a common > > prefix. The > > + * fields of the prefix are documented as part of struct > > + * grant_entry_v1. > > + */ > > +struct grant_entry_header { > > + u16 flags; > > + domid_t domid; > > +}; > > + > > +/* > > + * Version 2 of the grant entry structure, here is a union because > > three > > + * different types are suppotted: full_page, sub_page and > > transitive. > > + */ > > +union grant_entry_v2 { > > + struct grant_entry_header hdr; > > + > > + /* > > + * This member is used for V1-style full page grants, where > > either: > > + * > > + * -- hdr.type is GTF_accept_transfer, or > > + * -- hdr.type is GTF_permit_access and GTF_sub_page is not > > set. > > + * > > + * In that case, the frame field has the same semantics as the > > + * field of the same name in the V1 entry structure. > > + */ > > + struct { > > + struct grant_entry_header hdr; > > + u32 pad0; > > + u64 frame; > > + } full_page; > > + > > + /* > > + * If the grant type is GTF_grant_access and GTF_sub_page is > > set, > > + * @domid is allowed to access bytes [@page_off,@ > > page_off+ at length) > > + * in frame @frame. > > + */ > > + struct { > > + struct grant_entry_header hdr; > > + u16 page_off; > > + u16 length; > > + u64 frame; > > + } sub_page; > > + > > + /* > > + * If the grant is GTF_transitive, @domid is allowed to use the > > + * grant @gref in domain @trans_domid, as if it was the local > > + * domain. Obviously, the transitive access must be compatible > > + * with the original grant. > > + */ > > + struct { > > + struct grant_entry_header hdr; > > + domid_t trans_domid; > > + u16 pad0; > > + grant_ref_t gref; > > + } transitive; > > + > > + u32 __spacer[4]; /* Pad to a power of two */ > > +}; > > + > > +typedef u16 grant_status_t; > > + > > +/*********************************** > > + * GRANT TABLE QUERIES AND USES > > + */ > > + > > +/* > > + * Handle to track a mapping created via a grant reference. > > + */ > > +typedef u32 grant_handle_t; > > + > > +/* > > + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for > > access > > + * by devices and/or host CPUs. If successful, <handle> is a > > tracking number > > + * that must be presented later to destroy the mapping(s). On > > error, > > <handle> > > + * is a negative status code. > > + * NOTES: > > + * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is > > the > > address > > + * via which I/O devices may access the granted frame. > > + * 2. If GNTMAP_host_map is specified then a mapping will be > > added at > > + * either a host virtual address in the current address space, > > or at > > + * a PTE at the specified machine address. The type of > > mapping to > > + * perform is selected through the GNTMAP_contains_pte flag, > > and the > > + * address is specified in <host_addr>. > > + * 3. Mappings should only be destroyed via > > GNTTABOP_unmap_grant_ref. > > If a > > + * host mapping is destroyed by other means then it is *NOT* > > guaranteed > > + * to be accounted to the correct grant reference! > > + */ > > +#define GNTTABOP_map_grant_ref 0 > > +struct gnttab_map_grant_ref { > > + /* IN parameters. */ > > + u64 host_addr; > > + u32 flags; /* GNTMAP_* */ > > + grant_ref_t ref; > > + domid_t dom; > > + /* OUT parameters. */ > > + s16 status; /* GNTST_* */ > > + grant_handle_t handle; > > + u64 dev_bus_addr; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); > > + > > +/* > > + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference > > mappings > > + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, > > that > > + * field is ignored. If non-zero, they must refer to a device/host > > mapping > > + * that is tracked by <handle> > > + * NOTES: > > + * 1. The call may fail in an undefined manner if either mapping > > is not > > + * tracked by <handle>. > > + * 3. After executing a batch of unmaps, it is guaranteed that no > > stale > > + * mappings will remain in the device or host TLBs. > > + */ > > +#define GNTTABOP_unmap_grant_ref 1 > > +struct gnttab_unmap_grant_ref { > > + /* IN parameters. */ > > + u64 host_addr; > > + u64 dev_bus_addr; > > + grant_handle_t handle; > > + /* OUT parameters. */ > > + s16 status; /* GNTST_* */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); > > + > > +/* > > + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising > > at > > least > > + * <nr_frames> pages. The frame addresses are written to the > > <frame_list>. > > + * Only <nr_frames> addresses are written, even if the table is > > larger. > > + * NOTES: > > + * 1. <dom> may be specified as DOMID_SELF. > > + * 2. Only a sufficiently-privileged domain may specify <dom> != > > DOMID_SELF. > > + * 3. Xen may not support more than a single grant-table page per > > domain. > > + */ > > +#define GNTTABOP_setup_table 2 > > +struct gnttab_setup_table { > > + /* IN parameters. */ > > + domid_t dom; > > + u32 nr_frames; > > + /* OUT parameters. */ > > + s16 status; /* GNTST_* */ > > + > > + GUEST_HANDLE(xen_pfn_t)frame_list; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); > > + > > +/* > > + * GNTTABOP_dump_table: Dump the contents of the grant table to > > the > > + * xen console. Debugging use only. > > + */ > > +#define GNTTABOP_dump_table 3 > > +struct gnttab_dump_table { > > + /* IN parameters. */ > > + domid_t dom; > > + /* OUT parameters. */ > > + s16 status; /* GNTST_* */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); > > + > > +/* > > + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign > > domain. The > > + * foreign domain has previously registered its interest in the > > transfer via > > + * <domid, ref>. > > + * > > + * Note that, even if the transfer fails, the specified page no > > longer belongs > > + * to the calling domain *unless* the error is GNTST_bad_page. > > + */ > > +#define GNTTABOP_transfer 4 > > +struct gnttab_transfer { > > + /* IN parameters. */ > > + xen_pfn_t mfn; > > + domid_t domid; > > + grant_ref_t ref; > > + /* OUT parameters. */ > > + s16 status; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); > > + > > +/* > > + * GNTTABOP_copy: Hypervisor based copy > > + * source and destinations can be eithers MFNs or, for foreign > > domains, > > + * grant references. the foreign domain has to grant read/write > > access > > + * in its grant table. > > + * > > + * The flags specify what type source and destinations are (either > > MFN > > + * or grant reference). > > + * > > + * Note that this can also be used to copy data between two > > domains > > + * via a third party if the source and destination domains had > > previously > > + * grant appropriate access to their pages to the third party. > > + * > > + * source_offset specifies an offset in the source frame, > > dest_offset > > + * the offset in the target frame and len specifies the number of > > + * bytes to be copied. > > + */ > > + > > +#define _GNTCOPY_source_gref (0) > > +#define GNTCOPY_source_gref (1 << _GNTCOPY_source_gref) > > +#define _GNTCOPY_dest_gref (1) > > +#define GNTCOPY_dest_gref (1 << _GNTCOPY_dest_gref) > > + > > +#define GNTTABOP_copy 5 > > +struct gnttab_copy { > > + /* IN parameters. */ > > + struct { > > + union { > > + grant_ref_t ref; > > + xen_pfn_t gmfn; > > + } u; > > + domid_t domid; > > + u16 offset; > > + } source, dest; > > + u16 len; > > + u16 flags; /* GNTCOPY_* */ > > + /* OUT parameters. */ > > + s16 status; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); > > + > > +/* > > + * GNTTABOP_query_size: Query the current and maximum sizes of the > > shared > > + * grant table. > > + * NOTES: > > + * 1. <dom> may be specified as DOMID_SELF. > > + * 2. Only a sufficiently-privileged domain may specify <dom> != > > DOMID_SELF. > > + */ > > +#define GNTTABOP_query_size 6 > > +struct gnttab_query_size { > > + /* IN parameters. */ > > + domid_t dom; > > + /* OUT parameters. */ > > + u32 nr_frames; > > + u32 max_nr_frames; > > + s16 status; /* GNTST_* */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); > > + > > +/* > > + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference > > mappings > > + * tracked by <handle> but atomically replace the page table entry > > with one > > + * pointing to the machine address under <new_addr>. <new_addr> > > will > > be > > + * redirected to the null entry. > > + * NOTES: > > + * 1. The call may fail in an undefined manner if either mapping > > is not > > + * tracked by <handle>. > > + * 2. After executing a batch of unmaps, it is guaranteed that no > > stale > > + * mappings will remain in the device or host TLBs. > > + */ > > +#define GNTTABOP_unmap_and_replace 7 > > +struct gnttab_unmap_and_replace { > > + /* IN parameters. */ > > + u64 host_addr; > > + u64 new_addr; > > + grant_handle_t handle; > > + /* OUT parameters. */ > > + s16 status; /* GNTST_* */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); > > + > > +/* > > + * GNTTABOP_set_version: Request a particular version of the grant > > + * table shared table structure. This operation can only be > > performed > > + * once in any given domain. It must be performed before any > > grants > > + * are activated; otherwise, the domain will be stuck with version > > 1. > > + * The only defined versions are 1 and 2. > > + */ > > +#define GNTTABOP_set_version 8 > > +struct gnttab_set_version { > > + /* IN parameters */ > > + u32 version; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version); > > + > > +/* > > + * GNTTABOP_get_status_frames: Get the list of frames used to > > store grant > > + * status for <dom>. In grant format version 2, the status is > > separated > > + * from the other shared grant fields to allow more efficient > > synchronization > > + * using barriers instead of atomic cmpexch operations. > > + * <nr_frames> specify the size of vector <frame_list>. > > + * The frame addresses are returned in the <frame_list>. > > + * Only <nr_frames> addresses are returned, even if the table is > > larger. > > + * NOTES: > > + * 1. <dom> may be specified as DOMID_SELF. > > + * 2. Only a sufficiently-privileged domain may specify <dom> != > > DOMID_SELF. > > + */ > > +#define GNTTABOP_get_status_frames 9 > > +struct gnttab_get_status_frames { > > + /* IN parameters. */ > > + u32 nr_frames; > > + domid_t dom; > > + /* OUT parameters. */ > > + s16 status; /* GNTST_* */ > > + > > + GUEST_HANDLE(u64)frame_list; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); > > + > > +/* > > + * GNTTABOP_get_version: Get the grant table version which is in > > + * effect for domain <dom>. > > + */ > > +#define GNTTABOP_get_version 10 > > +struct gnttab_get_version { > > + /* IN parameters */ > > + domid_t dom; > > + u16 pad; > > + /* OUT parameters */ > > + u32 version; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); > > + > > +/* > > + * Issue one or more cache maintenance operations on a portion of > > a > > + * page granted to the calling domain by a foreign domain. > > + */ > > +#define GNTTABOP_cache_flush 12 > > +struct gnttab_cache_flush { > > + union { > > + u64 dev_bus_addr; > > + grant_ref_t ref; > > + } a; > > + u16 offset; /* offset from start of grant */ > > + u16 length; /* size within the grant */ > > +#define GNTTAB_CACHE_CLEAN (1 << 0) > > +#define GNTTAB_CACHE_INVAL (1 << 1) > > +#define GNTTAB_CACHE_SOURCE_GREF (1 << 31) > > + u32 op; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); > > + > > +/* > > + * Bitfield values for update_pin_status.flags. > > + */ > > + /* Map the grant entry for access by I/O devices. */ > > +#define _GNTMAP_device_map (0) > > +#define GNTMAP_device_map (1 << _GNTMAP_device_map) > > +/* Map the grant entry for access by host CPUs. */ > > +#define _GNTMAP_host_map (1) > > +#define GNTMAP_host_map (1 << _GNTMAP_host_map) > > +/* Accesses to the granted frame will be restricted to read-only > > access. */ > > +#define _GNTMAP_readonly (2) > > +#define GNTMAP_readonly (1 << _GNTMAP_readonly) > > +/* > > + * GNTMAP_host_map subflag: > > + * 0 => The host mapping is usable only by the guest OS. > > + * 1 => The host mapping is usable by guest OS + current > > application. > > + */ > > +#define _GNTMAP_application_map (3) > > +#define GNTMAP_application_map (1 << _GNTMAP_application_map) > > + > > +/* > > + * GNTMAP_contains_pte subflag: > > + * 0 => This map request contains a host virtual address. > > + * 1 => This map request contains the machine addess of the PTE > > to > > update. > > + */ > > +#define _GNTMAP_contains_pte (4) > > +#define GNTMAP_contains_pte (1 << _GNTMAP_contains_pte) > > + > > +/* > > + * Bits to be placed in guest kernel available PTE bits > > (architecture > > + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is > > set). > > + */ > > +#define _GNTMAP_guest_avail0 (16) > > +#define GNTMAP_guest_avail_mask ((u32)~0 << _GNTMAP_guest_avail0) > > + > > +/* > > + * Values for error status returns. All errors are -ve. > > + */ > > +#define GNTST_okay (0) /* Normal return. > > */ > > +#define GNTST_general_error (-1) /* General undefined error. > > */ > > +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. > > */ > > +#define GNTST_bad_gntref (-3) /* Unrecognised or > > inappropriate > > gntref. */ > > +#define GNTST_bad_handle (-4) /* Unrecognised or > > inappropriate > > handle. */ > > +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual > > address to > > map. */ > > +#define GNTST_bad_dev_addr (-6) /* Inappropriate device > > address to > > unmap.*/ > > +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. > > */ > > +#define GNTST_permission_denied (-8) /* Not enough privilege for > > operation. > > */ > > +#define GNTST_bad_page (-9) /* Specified page was invalid > > for op. > > */ > > +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page > > boundary. */ > > +#define GNTST_address_too_big (-11) /* transfer page address too > > large. > > */ > > +#define GNTST_eagain (-12) /* Operation not done; try > > again. > > */ > > + > > +#define GNTTABOP_error_msgs { \ > > + "okay", \ > > + "undefined error", \ > > + "unrecognised domain id", \ > > + "invalid grant reference", \ > > + "invalid mapping handle", \ > > + "invalid virtual address", \ > > + "invalid device address", \ > > + "no spare translation slot in the I/O MMU", \ > > + "permission denied", \ > > + "bad page", \ > > + "copy arguments cross page boundary", \ > > + "page address size too large", \ > > + "operation not done; try again" \ > > +} > > + > > +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ > > diff --git a/include/xen/interface/hvm/hvm_op.h > > b/include/xen/interface/hvm/hvm_op.h > > new file mode 100644 > > index 0000000000..1c53cad729 > > --- /dev/null > > +++ b/include/xen/interface/hvm/hvm_op.h > > @@ -0,0 +1,69 @@ > > +/* > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + */ > > + > > +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ > > +#define __XEN_PUBLIC_HVM_HVM_OP_H__ > > + > > +/* Get/set subcommands: the second argument of the hypercall is a > > + * pointer to a xen_hvm_param struct. > > + */ > > +#define HVMOP_set_param 0 > > +#define HVMOP_get_param 1 > > +struct xen_hvm_param { > > + domid_t domid; /* IN */ > > + u32 index; /* IN */ > > + u64 value; /* IN/OUT */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); > > + > > +/* Hint from PV drivers for pagetable destruction. */ > > +#define HVMOP_pagetable_dying 9 > > +struct xen_hvm_pagetable_dying { > > + /* Domain with a pagetable about to be destroyed. */ > > + domid_t domid; > > + /* guest physical address of the toplevel pagetable dying */ > > + aligned_u64 gpa; > > +}; > > + > > +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; > > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); > > + > > +enum hvmmem_type_t { > > + HVMMEM_ram_rw, /* Normal read/write guest RAM */ > > + HVMMEM_ram_ro, /* Read-only; writes are discarded > > */ > > + HVMMEM_mmio_dm, /* Reads and write go to the device > > model */ > > +}; > > + > > +#define HVMOP_get_mem_type 15 > > +/* Return hvmmem_type_t for the specified pfn. */ > > +struct xen_hvm_get_mem_type { > > + /* Domain to be queried. */ > > + domid_t domid; > > + /* OUT variable. */ > > + u16 mem_type; > > + u16 pad[2]; /* align next field on 8-byte boundary */ > > + /* IN variable. */ > > + u64 pfn; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); > > + > > +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ > > diff --git a/include/xen/interface/hvm/params.h > > b/include/xen/interface/hvm/params.h > > new file mode 100644 > > index 0000000000..4d61fc58d9 > > --- /dev/null > > +++ b/include/xen/interface/hvm/params.h > > @@ -0,0 +1,127 @@ > > +/* > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + */ > > + > > +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ > > +#define __XEN_PUBLIC_HVM_PARAMS_H__ > > + > > +#include <xen/interface/hvm/hvm_op.h> > > + > > +/* > > + * Parameter space for HVMOP_{set,get}_param. > > + */ > > + > > +#define HVM_PARAM_CALLBACK_IRQ 0 > > +/* > > + * How should CPU0 event-channel notifications be delivered? > > + * > > + * If val == 0 then CPU0 event-channel notifications are not > > delivered. > > + * If val != 0, val[63:56] encodes the type, as follows: > > + */ > > + > > +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 > > +/* > > + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it > > aliases val == 0, > > + * and disables all notifications. > > + */ > > + > > +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 > > +/* > > + * val[55:0] is a delivery PCI INTx line: > > + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = > > val[1:0] > > + */ > > + > > +#if defined(__i386__) || defined(__x86_64__) > > +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 > > +/* > > + * val[7:0] is a vector number. Check for > > XENFEAT_hvm_callback_vector to > > know > > + * if this delivery method is available. > > + */ > > +#elif defined(__arm__) || defined(__aarch64__) > > +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 > > +/* > > + * val[55:16] needs to be zero. > > + * val[15:8] is interrupt flag of the PPI used by event-channel: > > + * bit 8: the PPI is edge(1) or level(0) triggered > > + * bit 9: the PPI is active low(1) or high(0) > > + * val[7:0] is a PPI number used by event-channel. > > + * This is only used by ARM/ARM64 and masking/eoi the interrupt > > associated > > to > > + * the notification is handled by the interrupt controller. > > + */ > > +#endif > > + > > +#define HVM_PARAM_STORE_PFN 1 > > +#define HVM_PARAM_STORE_EVTCHN 2 > > + > > +#define HVM_PARAM_PAE_ENABLED 4 > > + > > +#define HVM_PARAM_IOREQ_PFN 5 > > + > > +#define HVM_PARAM_BUFIOREQ_PFN 6 > > + > > +/* > > + * Set mode for virtual timers (currently x86 only): > > + * delay_for_missed_ticks (default): > > + * Do not advance a vcpu's time beyond the correct delivery time > > for > > + * interrupts that have been missed due to preemption. Deliver > > missed > > + * interrupts when the vcpu is rescheduled and advance the > > vcpu's virtual > > + * time stepwise for each one. > > + * no_delay_for_missed_ticks: > > + * As above, missed interrupts are delivered, but guest time > > always tracks > > + * wallclock (i.e., real) time while doing so. > > + * no_missed_ticks_pending: > > + * No missed interrupts are held pending. Instead, to ensure > > ticks are > > + * delivered at some non-zero rate, if we detect missed ticks > > then the > > + * internal tick alarm is not disabled if the VCPU is preempted > > during the > > + * next tick period. > > + * one_missed_tick_pending: > > + * Missed interrupts are collapsed together and delivered as one > > 'late > > tick'. > > + * Guest time always tracks wallclock (i.e., real) time. > > + */ > > +#define HVM_PARAM_TIMER_MODE 10 > > +#define HVMPTM_delay_for_missed_ticks 0 > > +#define HVMPTM_no_delay_for_missed_ticks 1 > > +#define HVMPTM_no_missed_ticks_pending 2 > > +#define HVMPTM_one_missed_tick_pending 3 > > + > > +/* Boolean: Enable virtual HPET (high-precision event timer)? > > (x86-only) */ > > +#define HVM_PARAM_HPET_ENABLED 11 > > + > > +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ > > +#define HVM_PARAM_IDENT_PT 12 > > + > > +/* Device Model domain, defaults to 0. */ > > +#define HVM_PARAM_DM_DOMAIN 13 > > + > > +/* ACPI S state: currently support S0 and S3 on x86. */ > > +#define HVM_PARAM_ACPI_S_STATE 14 > > + > > +/* TSS used on Intel when CR0.PE=0. */ > > +#define HVM_PARAM_VM86_TSS 15 > > + > > +/* Boolean: Enable aligning all periodic vpts to reduce interrupts > > */ > > +#define HVM_PARAM_VPT_ALIGN 16 > > + > > +/* Console debug shared memory ring and event channel */ > > +#define HVM_PARAM_CONSOLE_PFN 17 > > +#define HVM_PARAM_CONSOLE_EVTCHN 18 > > + > > +#define HVM_NR_PARAMS 19 > > + > > +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ > > diff --git a/include/xen/interface/io/blkif.h > > b/include/xen/interface/io/blkif.h > > new file mode 100644 > > index 0000000000..7d74c99226 > > --- /dev/null > > +++ b/include/xen/interface/io/blkif.h > > @@ -0,0 +1,726 @@ > > +/************************************************************ > > ****************** > > + * blkif.h > > + * > > + * Unified block-device I/O interface for Xen guest OSes. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Copyright (c) 2003-2004, Keir Fraser > > + * Copyright (c) 2012, Spectra Logic Corporation > > + */ > > + > > +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ > > +#define __XEN_PUBLIC_IO_BLKIF_H__ > > + > > +#include "ring.h" > > +#include "../grant_table.h" > > + > > +/* > > + * Front->back notifications: When enqueuing a new request, > > sending a > > + * notification can be made conditional on req_event (i.e., the > > generic > > + * hold-off mechanism provided by the ring macros). Backends must > > set > > + * req_event appropriately (e.g., using > > RING_FINAL_CHECK_FOR_REQUESTS()). > > + * > > + * Back->front notifications: When enqueuing a new response, > > sending a > > + * notification can be made conditional on rsp_event (i.e., the > > generic > > + * hold-off mechanism provided by the ring macros). Frontends must > > set > > + * rsp_event appropriately (e.g., using > > RING_FINAL_CHECK_FOR_RESPONSES()). > > + */ > > + > > +#ifndef blkif_vdev_t > > +#define blkif_vdev_t u16 > > +#endif > > +#define blkif_sector_t u64 > > + > > +/* > > + * Feature and Parameter Negotiation > > + * ================================= > > + * The two halves of a Xen block driver utilize nodes within the > > XenStore to > > + * communicate capabilities and to negotiate operating > > parameters. This > > + * section enumerates these nodes which reside in the respective > > front and > > + * backend portions of the XenStore, following the XenBus > > convention. > > + * > > + * All data in the XenStore is stored as strings. Nodes > > specifying numeric > > + * values are encoded in decimal. Integer value ranges listed > > below are > > + * expressed as fixed sized integer types capable of storing the > > conversion > > + * of a properly formated node string, without loss of > > information. > > + * > > + * Any specified default value is in effect if the corresponding > > XenBus node > > + * is not present in the XenStore. > > + * > > + * XenStore nodes in sections marked "PRIVATE" are solely for use > > by the > > + * driver side whose XenBus tree contains them. > > + * > > + * XenStore nodes marked "DEPRECATED" in their notes section > > should only > > be > > + * used to provide interoperability with legacy implementations. > > + * > > + * See the XenBus state transition diagram below for details on > > when XenBus > > + * nodes must be published and when they can be queried. > > + * > > + > > ************************************************************** > > *************** > > + * Backend XenBus Nodes > > + > > ************************************************************** > > *************** > > + * > > + *------------------ Backend Device Identification (PRIVATE) --- > > --------------- > > + * > > + * mode > > + * Values: "r" (read only), "w" (writable) > > + * > > + * The read or write access permissions to the backing store > > to be > > + * granted to the frontend. > > + * > > + * params > > + * Values: string > > + * > > + * A free formatted string providing sufficient information > > for the > > + * hotplug script to attach the device and provide a suitable > > + * handler (ie: a block device) for blkback to use. > > + * > > + * physical-device > > + * Values: "MAJOR:MINOR" > > + * Notes: 11 > > + * > > + * MAJOR and MINOR are the major number and minor number of > > the > > + * backing device respectively. > > + * > > + * physical-device-path > > + * Values: path string > > + * > > + * A string that contains the absolute path to the disk > > image. On > > + * NetBSD and Linux this is always a block device, while on > > FreeBSD > > + * it can be either a block device or a regular file. > > + * > > + * type > > + * Values: "file", "phy", "tap" > > + * > > + * The type of the backing device/object. > > + * > > + * > > + * direct-io-safe > > + * Values: 0/1 (boolean) > > + * Default Value: 0 > > + * > > + * The underlying storage is not affected by the direct IO > > memory > > + * lifetime bug. See: > > + * > > https://urldefense.com/v3/__https://eur01.safelinks.protection.outlook.com/?url=http*3A*2F*2Flists.xe__;JSUl!!GF_29dbcQIUBPA!jD586eXHYPvw-3dNl43vD8yZH2dB5zfAfDsAEdhFEjZcol8ete6qMxK4PKq9W1aTi73eSJ8$ > > > > n.org%2Farchives%2Fhtml%2Fxen-devel%2F2012-12%2Fmsg01154.html&am > > p;data=02%7C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81 > > ddc0812%7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C63729217 > > 8170181802&sdata=wXiKB5EvbBokB%2BYrOdMDiKDBwSHo8m1ssXFp0K > > RQ0Io%3D&reserved=0 > > + * > > + * Therefore this option gives the backend permission to use > > + * O_DIRECT, notwithstanding that bug. > > + * > > + * That is, if this option is enabled, use of O_DIRECT is > > safe, > > + * in circumstances where we would normally have avoided it > > as a > > + * workaround for that bug. This option is not relevant for > > all > > + * backends, and even not necessarily supported for those for > > + * which it is relevant. A backend which knows that it is > > not > > + * affected by the bug can ignore this option. > > + * > > + * This option doesn't require a backend to use O_DIRECT, so > > it > > + * should not be used to try to control the caching > > behaviour. > > + * > > + *--------------------------------- Features ------------------- > > -------------- > > + * > > + * feature-barrier > > + * Values: 0/1 (boolean) > > + * Default Value: 0 > > + * > > + * A value of "1" indicates that the backend can process > > requests > > + * containing the BLKIF_OP_WRITE_BARRIER request opcode. > > Requests > > + * of this type may still be returned at any time with the > > + * BLKIF_RSP_EOPNOTSUPP result code. > > + * > > + * feature-flush-cache > > + * Values: 0/1 (boolean) > > + * Default Value: 0 > > + * > > + * A value of "1" indicates that the backend can process > > requests > > + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. > > Requests > > + * of this type may still be returned at any time with the > > + * BLKIF_RSP_EOPNOTSUPP result code. > > + * > > + * feature-discard > > + * Values: 0/1 (boolean) > > + * Default Value: 0 > > + * > > + * A value of "1" indicates that the backend can process > > requests > > + * containing the BLKIF_OP_DISCARD request opcode. Requests > > + * of this type may still be returned at any time with the > > + * BLKIF_RSP_EOPNOTSUPP result code. > > + * > > + * feature-persistent > > + * Values: 0/1 (boolean) > > + * Default Value: 0 > > + * Notes: 7 > > + * > > + * A value of "1" indicates that the backend can keep the > > grants used > > + * by the frontend driver mapped, so the same set of grants > > should be > > + * used in all transactions. The maximum number of grants the > > backend > > + * can map persistently depends on the implementation, but > > ideally it > > + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. > > Using this > > + * feature the backend doesn't need to unmap each grant, > > preventing > > + * costly TLB flushes. The backend driver should only map > > grants > > + * persistently if the frontend supports it. If a backend > > driver chooses > > + * to use the persistent protocol when the frontend doesn't > > support it, > > + * it will probably hit the maximum number of persistently > > mapped > > grants > > + * (due to the fact that the frontend won't be reusing the > > same > > grants), > > + * and fall back to non-persistent mode. Backend > > implementations > > may > > + * shrink or expand the number of persistently mapped grants > > without > > + * notifying the frontend depending on memory constraints > > (this might > > + * cause a performance degradation). > > + * > > + * If a backend driver wants to limit the maximum number of > > persistently > > + * mapped grants to a value less than RING_SIZE * > > + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be > > used to > > + * discard the grants that are less commonly used. Using a > > LRU in the > > + * backend driver paired with a LIFO queue in the frontend > > will > > + * allow us to have better performance in this scenario. > > + * > > + *----------------------- Request Transport Parameters --------- > > --------------- > > + * > > + * max-ring-page-order > > + * Values: <uint32_t> > > + * Default Value: 0 > > + * Notes: 1, 3 > > + * > > + * The maximum supported size of the request ring buffer in > > units of > > + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 > > pages, > > + * etc.). > > + * > > + * max-ring-pages > > + * Values: <uint32_t> > > + * Default Value: 1 > > + * Notes: DEPRECATED, 2, 3 > > + * > > + * The maximum supported size of the request ring buffer in > > units of > > + * machine pages. The value must be a power of 2. > > + * > > + *------------------------- Backend Device Properties ------------ > > ------------- > > + * > > + * discard-enable > > + * Values: 0/1 (boolean) > > + * Default Value: 1 > > + * > > + * This optional property, set by the toolstack, instructs > > the backend > > + * to offer (or not to offer) discard to the frontend. If the > > property > > + * is missing the backend should offer discard if the backing > > storage > > + * actually supports it. > > + * > > + * discard-alignment > > + * Values: <uint32_t> > > + * Default Value: 0 > > + * Notes: 4, 5 > > + * > > + * The offset, in bytes from the beginning of the virtual > > block device, > > + * to the first, addressable, discard extent on the > > underlying device. > > + * > > + * discard-granularity > > + * Values: <uint32_t> > > + * Default Value: <"sector-size"> > > + * Notes: 4 > > + * > > + * The size, in bytes, of the individually addressable > > discard extents > > + * of the underlying device. > > + * > > + * discard-secure > > + * Values: 0/1 (boolean) > > + * Default Value: 0 > > + * Notes: 10 > > + * > > + * A value of "1" indicates that the backend can process > > BLKIF_OP_DISCARD > > + * requests with the BLKIF_DISCARD_SECURE flag set. > > + * > > + * info > > + * Values: <uint32_t> (bitmap) > > + * > > + * A collection of bit flags describing attributes of the > > backing > > + * device. The VDISK_* macros define the meaning of each bit > > + * location. > > + * > > + * sector-size > > + * Values: <uint32_t> > > + * > > + * The logical block size, in bytes, of the underlying > > storage. This > > + * must be a power of two with a minimum value of 512. > > + * > > + * NOTE: Because of implementation bugs in some frontends > > this > > must be > > + * set to 512, unless the frontend advertizes a non- > > zero value > > + * in its "feature-large-sector-size" xenbus node. (See > > below). > > + * > > + * physical-sector-size > > + * Values: <uint32_t> > > + * Default Value: <"sector-size"> > > + * > > + * The physical block size, in bytes, of the backend storage. > > This > > + * must be an integer multiple of "sector-size". > > + * > > + * sectors > > + * Values: <u64> > > + * > > + * The size of the backend device, expressed in units of > > "sector-size". > > + * The product of "sector-size" and "sectors" must also be an > > integer > > + * multiple of "physical-sector-size", if that node is > > present. > > + * > > + > > ************************************************************** > > *************** > > + * Frontend XenBus Nodes > > + > > ************************************************************** > > *************** > > + * > > + *----------------------- Request Transport Parameters --------- > > -------------- > > + * > > + * event-channel > > + * Values: <uint32_t> > > + * > > + * The identifier of the Xen event channel used to signal > > activity > > + * in the ring buffer. > > + * > > + * ring-ref > > + * Values: <uint32_t> > > + * Notes: 6 > > + * > > + * The Xen grant reference granting permission for the > > backend to > > map > > + * the sole page in a single page sized ring buffer. > > + * > > + * ring-ref%u > > + * Values: <uint32_t> > > + * Notes: 6 > > + * > > + * For a frontend providing a multi-page ring, a "number of > > ring pages" > > + * sized list of nodes, each containing a Xen grant reference > > granting > > + * permission for the backend to map the page of the ring > > located > > + * at page index "%u". Page indexes are zero based. > > + * > > + * protocol > > + * Values: string (XEN_IO_PROTO_ABI_*) > > + * Default Value: XEN_IO_PROTO_ABI_NATIVE > > + * > > + * The machine ABI rules governing the format of all ring > > request and > > + * response structures. > > + * > > + * ring-page-order > > + * Values: <uint32_t> > > + * Default Value: 0 > > + * Maximum Value: MAX(ffs(max-ring-pages) - 1, > > max-ring-page-order) > > + * Notes: 1, 3 > > + * > > + * The size of the frontend allocated request ring buffer in > > units > > + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == > > 4 pages, > > + * etc.). > > + * > > + * num-ring-pages > > + * Values: <uint32_t> > > + * Default Value: 1 > > + * Maximum Value: MAX(max-ring-pages,(0x1 << > > max-ring-page-order)) > > + * Notes: DEPRECATED, 2, 3 > > + * > > + * The size of the frontend allocated request ring buffer in > > units of > > + * machine pages. The value must be a power of 2. > > + * > > + *--------------------------------- Features ------------------- > > -------------- > > + * > > + * feature-persistent > > + * Values: 0/1 (boolean) > > + * Default Value: 0 > > + * Notes: 7, 8, 9 > > + * > > + * A value of "1" indicates that the frontend will reuse the > > same grants > > + * for all transactions, allowing the backend to map them > > with write > > + * access (even when it should be read-only). If the frontend > > hits the > > + * maximum number of allowed persistently mapped grants, it > > can > > fallback > > + * to non persistent mode. This will cause a performance > > degradation, > > + * since the the backend driver will still try to map those > > grants > > + * persistently. Since the persistent grants protocol is > > compatible with > > + * the previous protocol, a frontend driver can choose to > > work in > > + * persistent mode even when the backend doesn't support it. > > + * > > + * It is recommended that the frontend driver stores the > > persistently > > + * mapped grants in a LIFO queue, so a subset of all > > persistently > > mapped > > + * grants gets used commonly. This is done in case the > > backend driver > > + * decides to limit the maximum number of persistently mapped > > grants > > + * to a value less than RING_SIZE * > > BLKIF_MAX_SEGMENTS_PER_REQUEST. > > + * > > + * feature-large-sector-size > > + * Values: 0/1 (boolean) > > + * Default Value: 0 > > + * > > + * A value of "1" indicates that the frontend will correctly > > supply and > > + * interpret all sector-based quantities in terms of the > > "sector-size" > > + * value supplied in the backend info, whatever that may be > > set to. > > + * If this node is not present or its value is "0" then it is > > assumed > > + * that the frontend requires that the logical block size is > > 512 as it > > + * is hardcoded (which is the case in some frontend > > implementations). > > + * > > + *------------------------- Virtual Device Properties ------------ > > ------------- > > + * > > + * device-type > > + * Values: "disk", "cdrom", "floppy", etc. > > + * > > + * virtual-device > > + * Values: <uint32_t> > > + * > > + * A value indicating the physical device to virtualize > > within the > > + * frontend's domain. (e.g. "The first ATA disk", "The third > > SCSI > > + * disk", etc.) > > + * > > + * See docs/misc/vbd-interface.txt for details on the format > > of this > > + * value. > > + * > > + * Notes > > + * ----- > > + * (1) Multi-page ring buffer scheme first developed in the Citrix > > XenServer > > + * PV drivers. > > + * (2) Multi-page ring buffer scheme first used in some RedHat > > distributions > > + * including a distribution deployed on certain nodes of the > > Amazon > > + * EC2 cluster. > > + * (3) Support for multi-page ring buffers was implemented > > independently, > > + * in slightly different forms, by both Citrix and > > RedHat/Amazon. > > + * For full interoperability, block front and backends should > > publish > > + * identical ring parameters, adjusted for unit differences, > > to the > > + * XenStore nodes used in both schemes. > > + * (4) Devices that support discard functionality may internally > > allocate space > > + * (discardable extents) in units that are larger than the > > exported > > logical > > + * block size. If the backing device has such discardable > > extents the > > + * backend should provide both discard-granularity and > > discard-alignment. > > + * Providing just one of the two may be considered an error by > > the > > frontend. > > + * Backends supporting discard should include discard- > > granularity and > > + * discard-alignment even if it supports discarding individual > > sectors. > > + * Frontends should assume discard-alignment == 0 and > > discard-granularity > > + * == sector size if these keys are missing. > > + * (5) The discard-alignment parameter allows a physical device to > > be > > + * partitioned into virtual devices that do not necessarily > > begin or > > + * end on a discardable extent boundary. > > + * (6) When there is only a single page allocated to the request > > ring, > > + * 'ring-ref' is used to communicate the grant reference for > > this > > + * page to the backend. When using a multi-page ring, the > > 'ring-ref' > > + * node is not created. Instead 'ring-ref0' - 'ring-refN' are > > used. > > + * (7) When using persistent grants data has to be copied from/to > > the page > > + * where the grant is currently mapped. The overhead of doing > > this > > copy > > + * however doesn't suppress the speed improvement of not > > having to > > unmap > > + * the grants. > > + * (8) The frontend driver has to allow the backend driver to map > > all grants > > + * with write access, even when they should be mapped read- > > only, > > since > > + * further requests may reuse these grants and require write > > permissions. > > + * (9) Linux implementation doesn't have a limit on the maximum > > number of > > + * grants that can be persistently mapped in the frontend > > driver, but > > + * due to the frontent driver implementation it should never > > be bigger > > + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. > > + *(10) The discard-secure property may be present and will be set > > to 1 if the > > + * backing device supports secure discard. > > + *(11) Only used by Linux and NetBSD. > > + */ > > + > > +/* > > + * Multiple hardware queues/rings: > > + * If supported, the backend will write the key "multi-queue-max- > > queues" to > > + * the directory for that vbd, and set its value to the maximum > > supported > > + * number of queues. > > + * Frontends that are aware of this feature and wish to use it can > > write the > > + * key "multi-queue-num-queues" with the number they wish to use, > > which > > must be > > + * greater than zero, and no more than the value reported by the > > backend in > > + * "multi-queue-max-queues". > > + * > > + * For frontends requesting just one queue, the usual event- > > channel and > > + * ring-ref keys are written as before, simplifying the backend > > processing > > + * to avoid distinguishing between a frontend that doesn't > > understand the > > + * multi-queue feature, and one that does, but requested only one > > queue. > > + * > > + * Frontends requesting two or more queues must not write the > > toplevel > > + * event-channel and ring-ref keys, instead writing those keys > > under > > sub-keys > > + * having the name "queue-N" where N is the integer ID of the > > queue/ring > > for > > + * which those keys belong. Queues are indexed from zero. > > + * For example, a frontend with two queues must write the > > following set of > > + * queue-related keys: > > + * > > + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" > > + * /local/domain/1/device/vbd/0/queue-0 = "" > > + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" > > + * /local/domain/1/device/vbd/0/queue-0/event-channel = > > "<evtchn#0>" > > + * /local/domain/1/device/vbd/0/queue-1 = "" > > + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" > > + * /local/domain/1/device/vbd/0/queue-1/event-channel = > > "<evtchn#1>" > > + * > > + * It is also possible to use multiple queues/rings together with > > + * feature multi-page ring buffer. > > + * For example, a frontend requests two queues/rings and the size > > of each > > ring > > + * buffer is two pages must write the following set of related > > keys: > > + * > > + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" > > + * /local/domain/1/device/vbd/0/ring-page-order = "1" > > + * /local/domain/1/device/vbd/0/queue-0 = "" > > + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" > > + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" > > + * /local/domain/1/device/vbd/0/queue-0/event-channel = > > "<evtchn#0>" > > + * /local/domain/1/device/vbd/0/queue-1 = "" > > + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" > > + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" > > + * /local/domain/1/device/vbd/0/queue-1/event-channel = > > "<evtchn#1>" > > + * > > + */ > > + > > +/* > > + * STATE DIAGRAMS > > + * > > + > > ************************************************************** > > *************** > > + * Startup > > * > > + > > ************************************************************** > > *************** > > + * > > + * Tool stack creates front and back nodes with state > > XenbusStateInitialising. > > + * > > + * Front Back > > + * ================================= > > ===================================== > > + * XenbusStateInitialising XenbusStateInitialising > > + * o Query virtual device o Query backend device > > identification > > + * properties. data. > > + * o Setup OS device instance. o Open and validate > > backend > > device. > > + * o Publish backend > > features and > > + * transport parameters. > > + * | > > + * | > > + * V > > + * XenbusStateInitWait > > + * > > + * o Query backend features and > > + * transport parameters. > > + * o Allocate and initialize the > > + * request ring. > > + * o Publish transport parameters > > + * that will be in effect during > > + * this connection. > > + * | > > + * | > > + * V > > + * XenbusStateInitialised > > + * > > + * o Query frontend > > transport parameters. > > + * o Connect to the request > > ring and > > + * event channel. > > + * o Publish backend device > > properties. > > + * | > > + * | > > + * V > > + * XenbusStateConnected > > + * > > + * o Query backend device properties. > > + * o Finalize OS virtual device > > + * instance. > > + * | > > + * | > > + * V > > + * XenbusStateConnected > > + * > > + * Note: Drivers that do not support any optional features, or the > > negotiation > > + * of transport parameters, can skip certain states in the > > state > > machine: > > + * > > + * o A frontend may transition to XenbusStateInitialised > > without > > + * waiting for the backend to enter > > XenbusStateInitWait. In this > > + * case, default transport parameters are in effect and > > any > > + * transport parameters published by the frontend must > > contain > > + * their default values. > > + * > > + * o A backend may transition to XenbusStateInitialised, > > bypassing > > + * XenbusStateInitWait, without waiting for the frontend > > to first > > + * enter the XenbusStateInitialised state. In this case, > > default > > + * transport parameters are in effect and any transport > > parameters > > + * published by the backend must contain their default > > values. > > + * > > + * Drivers that support optional features and/or transport > > parameter > > + * negotiation must tolerate these additional state > > transition paths. > > + * In general this means performing the work of any skipped > > state > > + * transition, if it has not already been performed, in > > addition to the > > + * work associated with entry into the current state. > > + */ > > + > > +/* > > + * REQUEST CODES. > > + */ > > +#define BLKIF_OP_READ 0 > > +#define BLKIF_OP_WRITE 1 > > +/* > > + * All writes issued prior to a request with the > > BLKIF_OP_WRITE_BARRIER > > + * operation code ("barrier request") must be completed prior to > > the > > + * execution of the barrier request. All writes issued after the > > barrier > > + * request must not execute until after the completion of the > > barrier request. > > + * > > + * Optional. See "feature-barrier" XenBus node documentation > > above. > > + */ > > +#define BLKIF_OP_WRITE_BARRIER 2 > > +/* > > + * Commit any uncommitted contents of the backing device's > > volatile cache > > + * to stable storage. > > + * > > + * Optional. See "feature-flush-cache" XenBus node documentation > > above. > > + */ > > +#define BLKIF_OP_FLUSH_DISKCACHE 3 > > +/* > > + * Used in SLES sources for device specific command packet > > + * contained within the request. Reserved for that purpose. > > + */ > > +#define BLKIF_OP_RESERVED_1 4 > > +/* > > + * Indicate to the backend device that a region of storage is no > > longer in > > + * use, and may be discarded at any time without impact to the > > client. If > > + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies > > of the > > + * discarded region on the device must be rendered unrecoverable > > before > > the > > + * command returns. > > + * > > + * This operation is analogous to performing a trim (ATA) or unamp > > (SCSI), > > + * command on a native device. > > + * > > + * More information about trim/unmap operations can be found at: > > + * > > https://urldefense.com/v3/__https://eur01.safelinks.protection.outlook.com/?url=http*3A*2F*2Ft13.org__;JSUl!!GF_29dbcQIUBPA!jD586eXHYPvw-3dNl43vD8yZH2dB5zfAfDsAEdhFEjZcol8ete6qMxK4PKq9W1aTLlXS-Uk$ > > > > %2FDocuments%2FUploadedDocuments%2Fdocs2008%2F&data=02%7 > > C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%7C > > 686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637292178170181802 > > &sdata=JOOjsvkjqxkuoF47PMVw1loNNDhxPCXQVdPQQklTIGM%3D&am > > p;reserved=0 > > + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc > > + * > > https://urldefense.com/v3/__https://eur01.safelinks.protection.outlook.com/?url=http*3A*2F*2Fwww.s__;JSUl!!GF_29dbcQIUBPA!jD586eXHYPvw-3dNl43vD8yZH2dB5zfAfDsAEdhFEjZcol8ete6qMxK4PKq9W1aTiWVfQfs$ > > > > eagate.com%2Fstaticfiles%2Fsupport%2Fdisc%2Fmanuals%2F&data=02 > > %7C01%7Cpeng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812% > > 7C686ea1d3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C6372921781701818 > > 02&sdata=gd5Cvr1Q9%2Bv%2BfUS5OleuozBITkjbybYoR302s4XsVv8%3D > > &reserved=0 > > + * Interface%20manuals/100293068c.pdf > > + * > > + * Optional. See "feature-discard", "discard-alignment", > > + * "discard-granularity", and "discard-secure" in the XenBus node > > + * documentation above. > > + */ > > +#define BLKIF_OP_DISCARD 5 > > + > > +/* > > + * Recognized if "feature-max-indirect-segments" in present in the > > backend > > + * xenbus info. The "feature-max-indirect-segments" node contains > > the > > maximum > > + * number of segments allowed by the backend per request. If the > > node is > > + * present, the frontend might use blkif_request_indirect structs > > in order to > > + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST > > (11). The > > + * maximum number of indirect segments is fixed by the backend, > > but the > > + * frontend can issue requests with any number of indirect > > segments as long > > as > > + * it's less than the number provided by the backend. The > > indirect_grefs field > > + * in blkif_request_indirect should be filled by the frontend with > > the > > + * grant references of the pages that are holding the indirect > > segments. > > + * These pages are filled with an array of blkif_request_segment > > that hold > > the > > + * information about the segments. The number of indirect pages to > > use is > > + * determined by the number of segments an indirect request > > contains. > > Every > > + * indirect page can contain a maximum of > > + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so > > to > > + * calculate the number of indirect pages to use we have to do > > + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct > > blkif_request_segment))). > > + * > > + * If a backend does not recognize BLKIF_OP_INDIRECT, it should > > *not* > > + * create the "feature-max-indirect-segments" node! > > + */ > > +#define BLKIF_OP_INDIRECT 6 > > + > > +/* > > + * Maximum scatter/gather segments per request. > > + * This is carefully chosen so that sizeof(blkif_ring_t) <= > > PAGE_SIZE. > > + * NB. This could be 12 if the ring indexes weren't stored in the > > same page. > > + */ > > +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 > > + > > +/* > > + * Maximum number of indirect pages to use per request. > > + */ > > +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 > > + > > +/* > > + * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as > > well as > > + * 'sector_number' in blkif_request, blkif_request_discard and > > + * blkif_request_indirect are sector-based quantities. See the > > description > > + * of the "feature-large-sector-size" frontend xenbus node above > > for > > + * more information. > > + */ > > +struct blkif_request_segment { > > + grant_ref_t gref; /* reference to I/O buffer > > frame */ > > + /* @first_sect: first sector in frame to transfer > > (inclusive). */ > > + /* @last_sect: last sector in frame to transfer > > (inclusive). */ > > + u8 first_sect, last_sect; > > +}; > > + > > +/* > > + * Starting ring element for any I/O request. > > + */ > > +struct blkif_request { > > + u8 operation; /* BLKIF_OP_??? > > */ > > + u8 nr_segments; /* number of segments > > */ > > + blkif_vdev_t handle; /* only for read/write requests > > */ > > + u64 id; /* private guest value, echoed in > > resp */ > > + blkif_sector_t sector_number;/* start sector idx on disk (r/w > > only) */ > > + struct blkif_request_segment > > seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > > +}; > > + > > +typedef struct blkif_request blkif_request_t; > > + > > +/* > > + * Cast to this structure when blkif_request.operation == > > BLKIF_OP_DISCARD > > + * sizeof(struct blkif_request_discard) <= sizeof(struct > > blkif_request) > > + */ > > +struct blkif_request_discard { > > + u8 operation; /* BLKIF_OP_DISCARD > > */ > > + u8 flag; /* BLKIF_DISCARD_SECURE or zero > > */ > > +#define BLKIF_DISCARD_SECURE (1 << 0) /* ignored if discard- > > secure=0 > > */ > > + blkif_vdev_t handle; /* same as for read/write requests > > */ > > + u64 id; /* private guest value, echoed in > > resp */ > > + blkif_sector_t sector_number;/* start sector idx on disk > > */ > > + u64 nr_sectors; /* number of contiguous sectors to > > discard*/ > > +}; > > + > > +typedef struct blkif_request_discard blkif_request_discard_t; > > + > > +struct blkif_request_indirect { > > + u8 operation; /* BLKIF_OP_INDIRECT > > */ > > + u8 indirect_op; /* BLKIF_OP_{READ/WRITE} > > */ > > + u16 nr_segments; /* number of segments > > */ > > + u64 id; /* private guest value, echoed in > > resp */ > > + blkif_sector_t sector_number;/* start sector idx on disk (r/w > > only) */ > > + blkif_vdev_t handle; /* same as for read/write requests > > */ > > + grant_ref_t > > indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; > > +#ifdef __i386__ > > + u64 pad; /* Make it 64 byte aligned on i386 > > */ > > +#endif > > +}; > > + > > +typedef struct blkif_request_indirect blkif_request_indirect_t; > > + > > +struct blkif_response { > > + u64 id; /* copied from request */ > > + u8 operation; /* copied from request */ > > + s16 status; /* BLKIF_RSP_??? */ > > +}; > > + > > +typedef struct blkif_response blkif_response_t; > > + > > +/* > > + * STATUS RETURN CODES. > > + */ > > + /* Operation not supported (only happens on barrier writes). */ > > +#define BLKIF_RSP_EOPNOTSUPP -2 > > + /* Operation failed for some unspecified reason (-EIO). */ > > +#define BLKIF_RSP_ERROR -1 > > + /* Operation completed successfully. */ > > +#define BLKIF_RSP_OKAY 0 > > + > > +/* > > + * Generate blkif ring structures and types. > > + */ > > +DEFINE_RING_TYPES(blkif, struct blkif_request, struct > > blkif_response); > > + > > +#define VDISK_CDROM 0x1 > > +#define VDISK_REMOVABLE 0x2 > > +#define VDISK_READONLY 0x4 > > + > > +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ > > + > > +/* > > + * Local variables: > > + * mode: C > > + * c-file-style: "BSD" > > + * c-basic-offset: 4 > > + * tab-width: 4 > > + * indent-tabs-mode: nil > > + * End: > > + */ > > diff --git a/include/xen/interface/io/console.h > > b/include/xen/interface/io/console.h > > new file mode 100644 > > index 0000000000..3489fc7a60 > > --- /dev/null > > +++ b/include/xen/interface/io/console.h > > @@ -0,0 +1,56 @@ > > +/************************************************************ > > ****************** > > + * console.h > > + * > > + * Console I/O interface for Xen guest OSes. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Copyright (c) 2005, Keir Fraser > > + */ > > + > > +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ > > +#define __XEN_PUBLIC_IO_CONSOLE_H__ > > + > > +typedef u32 XENCONS_RING_IDX; > > + > > +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring) - 1)) > > + > > +struct xencons_interface { > > + char in[1024]; > > + char out[2048]; > > + XENCONS_RING_IDX in_cons, in_prod; > > + XENCONS_RING_IDX out_cons, out_prod; > > +}; > > + > > +#ifdef XEN_WANT_FLEX_CONSOLE_RING > > +#include "ring.h" > > +DEFINE_XEN_FLEX_RING(xencons); > > +#endif > > + > > +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ > > + > > +/* > > + * Local variables: > > + * mode: C > > + * c-file-style: "BSD" > > + * c-basic-offset: 4 > > + * tab-width: 4 > > + * indent-tabs-mode: nil > > + * End: > > + */ > > diff --git a/include/xen/interface/io/protocols.h > > b/include/xen/interface/io/protocols.h > > new file mode 100644 > > index 0000000000..52b4de0f81 > > --- /dev/null > > +++ b/include/xen/interface/io/protocols.h > > @@ -0,0 +1,42 @@ > > +/************************************************************ > > ****************** > > + * protocols.h > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Copyright (c) 2008, Keir Fraser > > + */ > > + > > +#ifndef __XEN_PROTOCOLS_H__ > > +#define __XEN_PROTOCOLS_H__ > > + > > +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" > > +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" > > +#define XEN_IO_PROTO_ABI_ARM "arm-abi" > > + > > +#if defined(__i386__) > > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 > > +#elif defined(__x86_64__) > > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 > > +#elif defined(__arm__) || defined(__aarch64__) > > +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM > > +#else > > +# error arch fixup needed here > > +#endif > > + > > +#endif > > diff --git a/include/xen/interface/io/ring.h > > b/include/xen/interface/io/ring.h > > new file mode 100644 > > index 0000000000..4e02678e3c > > --- /dev/null > > +++ b/include/xen/interface/io/ring.h > > @@ -0,0 +1,479 @@ > > +/************************************************************ > > ****************** > > + * ring.h > > + * > > + * Shared producer-consumer ring macros. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Tim Deegan and Andrew Warfield November 2004. > > + */ > > + > > +#ifndef __XEN_PUBLIC_IO_RING_H__ > > +#define __XEN_PUBLIC_IO_RING_H__ > > + > > +/* > > + * When #include'ing this header, you need to provide the > > following > > + * declaration upfront: > > + * - standard integers types (u8, u16, etc) > > + * They are provided by stdint.h of the standard headers. > > + * > > + * In addition, if you intend to use the FLEX macros, you also > > need to > > + * provide the following, before invoking the FLEX macros: > > + * - size_t > > + * - memcpy > > + * - grant_ref_t > > + * These declarations are provided by string.h of the standard > > headers, > > + * and grant_table.h from the Xen public headers. > > + */ > > + > > +#include <xen/interface/grant_table.h> > > + > > +typedef unsigned int RING_IDX; > > + > > +/* Round a 32-bit unsigned constant down to the nearest power of > > two. */ > > +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : > > ((_x) > > & 0x1)) > > +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : > > __RD2(_x)) > > +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : > > __RD4(_x)) > > +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : > > __RD8(_x)) > > +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : > > __RD16(_x)) > > + > > +/* > > + * Calculate size of a shared ring, given the total available > > space for the > > + * ring and indexes (_sz), and the name tag of the > > request/response > > structure. > > + * A ring contains as many entries as will fit, rounded down to > > the nearest > > + * power of two (so we can mask with (size-1) to loop around). > > + */ > > +#define __CONST_RING_SIZE(_s, _sz) \ > > + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ > > + sizeof(((struct _s##_sring *)0)->ring[0]))) > > +/* > > + * The same for passing in an actual pointer instead of a name > > tag. > > + */ > > +#define __RING_SIZE(_s, _sz) \ > > + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)- > > >ring[0]))) > > + > > +/* > > + * Macros to make the correct C datatypes for a new kind of ring. > > + * > > + * To make a new ring datatype, you need to have two message > > structures, > > + * let's say request_t, and response_t already defined. > > + * > > + * In a header where you want the ring datatype declared, you then > > do: > > + * > > + * DEFINE_RING_TYPES(mytag, request_t, response_t); > > + * > > + * These expand out to give you a set of types, as you can see > > below. > > + * The most important of these are: > > + * > > + * mytag_sring_t - The shared ring. > > + * mytag_front_ring_t - The 'front' half of the ring. > > + * mytag_back_ring_t - The 'back' half of the ring. > > + * > > + * To initialize a ring in your code you need to know the location > > and size > > + * of the shared memory area (PAGE_SIZE, for instance). To > > initialise > > + * the front half: > > + * > > + * mytag_front_ring_t front_ring; > > + * SHARED_RING_INIT((mytag_sring_t *)shared_page); > > + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, > > PAGE_SIZE); > > + * > > + * Initializing the back follows similarly (note that only the > > front > > + * initializes the shared ring): > > + * > > + * mytag_back_ring_t back_ring; > > + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, > > PAGE_SIZE); > > + */ > > + > > +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) > > \ > > + > > \ > > +/* Shared ring entry */ > > \ > > +union __name##_sring_entry > > { \ > > + __req_t req; > > \ > > + __rsp_t rsp; > > \ > > +}; > > \ > > + > > \ > > +/* Shared ring page */ > > \ > > +struct __name##_sring > > { \ > > + RING_IDX req_prod, req_event; > > \ > > + RING_IDX rsp_prod, rsp_event; > > \ > > + union > > { > > \ > > + struct > > { \ > > + u8 smartpoll_active; > > \ > > + } netif; > > \ > > + struct > > { \ > > + u8 msg; > > \ > > + } tapif_user; > > \ > > + u8 pvt_pad[4]; > > \ > > + } pvt; > > \ > > + u8 __pad[44]; > > \ > > + union __name##_sring_entry ring[1]; /* variable-length */ > > \ > > +}; > > \ > > + > > \ > > +/* "Front" end's private variables */ > > \ > > +struct __name##_front_ring > > { \ > > + RING_IDX req_prod_pvt; > > \ > > + RING_IDX rsp_cons; > > \ > > + unsigned int nr_ents; > > \ > > + struct __name##_sring *sring; > > \ > > +}; > > \ > > + > > \ > > +/* "Back" end's private variables */ > > \ > > +struct __name##_back_ring > > { \ > > + RING_IDX rsp_prod_pvt; > > \ > > + RING_IDX req_cons; > > \ > > + unsigned int nr_ents; > > \ > > + struct __name##_sring *sring; > > \ > > +}; > > \ > > + > > \ > > +/* Syntactic sugar */ > > \ > > +typedef struct __name##_sring __name##_sring_t; > > \ > > +typedef struct __name##_front_ring __name##_front_ring_t; > > \ > > +typedef struct __name##_back_ring __name##_back_ring_t > > + > > +/* > > + * Macros for manipulating rings. > > + * > > + * FRONT_RING_whatever works on the "front end" of a ring: here > > + * requests are pushed on to the ring and responses taken off it. > > + * > > + * BACK_RING_whatever works on the "back end" of a ring: here > > + * requests are taken off the ring and responses put on. > > + * > > + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. > > + * This is OK in 1-for-1 request-response situations where the > > + * requestor (front end) never has more than RING_SIZE()-1 > > + * outstanding requests. > > + */ > > + > > +/* Initialising empty rings */ > > +#define SHARED_RING_INIT(_s) do > > { \ > > + (_s)->req_prod = (_s)->rsp_prod = 0; > > \ > > + (_s)->req_event = (_s)->rsp_event = 1; > > \ > > + (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad)); > > \ > > + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); > > \ > > +} while (0) > > + > > +#define FRONT_RING_INIT(_r, _s, __size) do > > { \ > > + (_r)->req_prod_pvt = 0; > > \ > > + (_r)->rsp_cons = 0; > > \ > > + (_r)->nr_ents = __RING_SIZE(_s, __size); > > \ > > + (_r)->sring = (_s); > > \ > > +} while (0) > > + > > +#define BACK_RING_INIT(_r, _s, __size) do > > { \ > > + (_r)->rsp_prod_pvt = 0; > > \ > > + (_r)->req_cons = 0; > > \ > > + (_r)->nr_ents = __RING_SIZE(_s, __size); > > \ > > + (_r)->sring = (_s); > > \ > > +} while (0) > > + > > +/* How big is this ring? */ > > +#define RING_SIZE(_r) > > \ > > + ((_r)->nr_ents) > > + > > +/* Number of free requests (for use on front side only). */ > > +#define RING_FREE_REQUESTS(_r) > > \ > > + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) > > + > > +/* Test if there is an empty slot available on the front ring. > > + * (This is only meaningful from the front. ) > > + */ > > +#define RING_FULL(_r) > > \ > > + (RING_FREE_REQUESTS(_r) == 0) > > + > > +/* Test if there are outstanding messages to be processed on a > > ring. */ > > +#define RING_HAS_UNCONSUMED_RESPONSES(_r) > > \ > > + ((_r)->sring->rsp_prod - (_r)->rsp_cons) > > + > > +#ifdef __GNUC__ > > +#define RING_HAS_UNCONSUMED_REQUESTS(_r) > > ({ \ > > + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; > > \ > > + unsigned int rsp = RING_SIZE(_r) - > > \ > > + ((_r)->req_cons - (_r)->rsp_prod_pvt); > > \ > > + req < rsp ? req : rsp; > > \ > > +}) > > +#else > > +/* Same as above, but without the nice GCC ({ ... }) syntax. */ > > +#define RING_HAS_UNCONSUMED_REQUESTS(_r) > > \ > > + ((((_r)->sring->req_prod - (_r)->req_cons) < > > \ > > + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? > > \ > > + ((_r)->sring->req_prod - (_r)->req_cons) : > > \ > > + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) > > +#endif > > + > > +/* Direct access to individual ring elements, by index. */ > > +#define RING_GET_REQUEST(_r, _idx) > > \ > > + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) > > + > > +/* > > + * Get a local copy of a request. > > + * > > + * Use this in preference to RING_GET_REQUEST() so all processing > > is > > + * done on a local copy that cannot be modified by the other end. > > + * > > + * Note that > > https://urldefense.com/v3/__https://eur01.safelinks.protection.outlook.com/?url=https*3A*2F*2Fgcc.gn__;JSUl!!GF_29dbcQIUBPA!jD586eXHYPvw-3dNl43vD8yZH2dB5zfAfDsAEdhFEjZcol8ete6qMxK4PKq9W1aTD-_NctI$ > > > > u.org%2Fbugzilla%2Fshow_bug.cgi%3Fid%3D58145&data=02%7C01%7C > > peng.fan%40nxp.com%7Cdd87f4854f514bc096ba08d81ddc0812%7C686ea1d > > 3bc2b4c6fa92cd99c5c301635%7C0%7C0%7C637292178170181802&sd > > ata=hZDVA%2FOZbJO%2Fh4uzROYzVzmB05ekJWbcnkDAXsHzClc%3D&re > > served=0 may cause this > > + * to be ineffective where _req is a struct which consists of only > > bitfields. > > + */ > > +#define RING_COPY_REQUEST(_r, _idx, _req) do { > > \ > > + /* Use volatile to force the copy into _req. */ > > \ > > + *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); > > \ > > +} while (0) > > + > > +#define RING_GET_RESPONSE(_r, _idx) > > \ > > + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) > > + > > +/* Loop termination condition: Would the specified index overflow > > the ring? > > */ > > +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) > > \ > > + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) > > + > > +/* Ill-behaved frontend determination: Can there be this many > > requests? */ > > +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) > > \ > > + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) > > + > > +#define RING_PUSH_REQUESTS(_r) do > > { \ > > + xen_wmb(); /* back sees requests /before/ updated producer > > index */ > > \ > > + (_r)->sring->req_prod = (_r)->req_prod_pvt; > > \ > > +} while (0) > > + > > +#define RING_PUSH_RESPONSES(_r) do > > { \ > > + xen_wmb(); /* front sees resps /before/ updated producer index > > */ > > \ > > + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; > > \ > > +} while (0) > > + > > +/* > > + * Notification hold-off (req_event and rsp_event): > > + * > > + * When queueing requests or responses on a shared ring, it may > > not always > > be > > + * necessary to notify the remote end. For example, if requests > > are in flight > > + * in a backend, the front may be able to queue further requests > > without > > + * notifying the back (if the back checks for new requests when it > > queues > > + * responses). > > + * > > + * When enqueuing requests or responses: > > + * > > + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The > > second argument > > + * is a boolean return value. True indicates that the receiver > > requires an > > + * asynchronous notification. > > + * > > + * After dequeuing requests or responses (before sleeping the > > connection): > > + * > > + * Use RING_FINAL_CHECK_FOR_REQUESTS() or > > RING_FINAL_CHECK_FOR_RESPONSES(). > > + * The second argument is a boolean return value. True indicates > > that there > > + * are pending messages on the ring (i.e., the connection should > > not be put > > + * to sleep). > > + * > > + * These macros will set the req_event/rsp_event field to trigger > > a > > + * notification on the very next message that is enqueued. If you > > want to > > + * create batches of work (i.e., only receive a notification > > after several > > + * messages have been enqueued) then you will need to create a > > customised > > + * version of the FINAL_CHECK macro in your own code, which sets > > the > > event > > + * field appropriately. > > + */ > > + > > +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do > > { \ > > + RING_IDX __old = (_r)->sring->req_prod; > > \ > > + RING_IDX __new = (_r)->req_prod_pvt; > > \ > > + xen_wmb(); /* back sees requests /before/ updated producer > > index */ > > \ > > + (_r)->sring->req_prod = __new; > > \ > > + xen_mb(); /* back sees new requests /before/ we check req_event > > */ > > \ > > + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < > > \ > > + (RING_IDX)(__new - > > __old)); \ > > +} while (0) > > + > > +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do > > { \ > > + RING_IDX __old = (_r)->sring->rsp_prod; > > \ > > + RING_IDX __new = (_r)->rsp_prod_pvt; > > \ > > + xen_wmb(); /* front sees resps /before/ updated producer index > > */ > > \ > > + (_r)->sring->rsp_prod = __new; > > \ > > + xen_mb(); /* front sees new resps /before/ we check rsp_event > > */ > > \ > > + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < > > \ > > + (RING_IDX)(__new - > > __old)); \ > > +} while (0) > > + > > +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do > > { \ > > + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); > > \ > > + if (_work_to_do) > > \ > > + break; > > \ > > + (_r)->sring->req_event = (_r)->req_cons + 1; > > \ > > + xen_mb(); > > \ > > + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); > > \ > > +} while (0) > > + > > +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do > > { \ > > + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); > > \ > > + if (_work_to_do) > > \ > > + break; > > \ > > + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; > > \ > > + xen_mb(); > > \ > > + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); > > \ > > +} while (0) > > + > > +/* > > + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings > > and > > + * functions to check if there is data on the ring, and to read > > and > > + * write to them. > > + * > > + * DEFINE_XEN_FLEX_RING is similar to > > DEFINE_XEN_FLEX_RING_AND_INTF, but > > + * does not define the indexes page. As different protocols can > > have > > + * extensions to the basic format, this macro allow them to define > > their > > + * own struct. > > + * > > + * XEN_FLEX_RING_SIZE > > + * Convenience macro to calculate the size of one of the two > > rings > > + * from the overall order. > > + * > > + * $NAME_mask > > + * Function to apply the size mask to an index, to reduce the > > index > > + * within the range [0-size]. > > + * > > + * $NAME_read_packet > > + * Function to read data from the ring. The amount of data to > > read is > > + * specified by the "size" argument. > > + * > > + * $NAME_write_packet > > + * Function to write data to the ring. The amount of data to > > write is > > + * specified by the "size" argument. > > + * > > + * $NAME_get_ring_ptr > > + * Convenience function that returns a pointer to read/write to > > the > > + * ring at the right location. > > + * > > + * $NAME_data_intf > > + * Indexes page, shared between frontend and backend. It also > > + * contains the array of grant refs. > > + * > > + * $NAME_queued > > + * Function to calculate how many bytes are currently on the > > ring, > > + * ready to be read. It can also be used to calculate how much > > free > > + * space is currently on the ring (XEN_FLEX_RING_SIZE() - > > + * $NAME_queued()). > > + */ > > + > > +#ifndef XEN_PAGE_SHIFT > > +/* The PAGE_SIZE for ring protocols and hypercall interfaces is > > always > > + * 4K, regardless of the architecture, and page granularity chosen > > by > > + * operating systems. > > + */ > > +#define XEN_PAGE_SHIFT 12 > > +#endif > > +#define XEN_FLEX_RING_SIZE(order) > > \ > > + (1UL << ((order) + XEN_PAGE_SHIFT - 1)) > > + > > +#define DEFINE_XEN_FLEX_RING(name) > > \ > > +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX > > ring_size) > > \ > > +{ > > \ > > + return idx & (ring_size - 1); > > \ > > +} > > \ > > + > > \ > > +static inline unsigned char *name##_get_ring_ptr(unsigned char > > *buf, > > \ > > + RING_IDX > > idx, \ > > + RING_IDX > > ring_size) \ > > +{ > > \ > > + return buf + name##_mask(idx, ring_size); > > \ > > +} > > \ > > + > > \ > > +static inline void name##_read_packet(void *opaque, > > \ > > + const unsigned char > > *buf, \ > > + size_t size, > > \ > > + RING_IDX masked_prod, > > \ > > + RING_IDX *masked_cons, > > \ > > + RING_IDX ring_size) > > \ > > +{ > > \ > > + if (*masked_cons < masked_prod || > > \ > > + size <= ring_size - *masked_cons) > > { \ > > + memcpy(opaque, buf + *masked_cons, size); > > \ > > + } else > > { > > \ > > + memcpy(opaque, buf + *masked_cons, ring_size - > > *masked_cons); > > \ > > + memcpy((unsigned char *)opaque + ring_size - > > *masked_cons, buf, > > \ > > + size - (ring_size - *masked_cons)); > > \ > > + } > > \ > > + *masked_cons = name##_mask(*masked_cons + size, ring_size); > > \ > > +} > > \ > > + > > \ > > +static inline void name##_write_packet(unsigned char *buf, > > \ > > + const void *opaque, > > \ > > + size_t size, > > \ > > + RING_IDX *masked_prod, > > \ > > + RING_IDX masked_cons, > > \ > > + RING_IDX ring_size) > > \ > > +{ > > \ > > + if (*masked_prod < masked_cons || > > \ > > + size <= ring_size - *masked_prod) > > { \ > > + memcpy(buf + *masked_prod, opaque, size); > > \ > > + } else > > { > > \ > > + memcpy(buf + *masked_prod, opaque, ring_size - > > *masked_prod); > > \ > > + memcpy(buf, (unsigned char *)opaque + (ring_size - > > *masked_prod), > > \ > > + size - (ring_size - *masked_prod)); > > \ > > + } > > \ > > + *masked_prod = name##_mask(*masked_prod + size, ring_size); > > \ > > +} > > \ > > + > > \ > > +static inline RING_IDX name##_queued(RING_IDX prod, > > \ > > + RING_IDX cons, > > \ > > + RING_IDX ring_size) > > \ > > +{ > > \ > > + RING_IDX size; > > \ > > + > > \ > > + if (prod == cons) > > \ > > + return 0; > > \ > > + > > \ > > + prod = name##_mask(prod, ring_size); > > \ > > + cons = name##_mask(cons, ring_size); > > \ > > + > > \ > > + if (prod == cons) > > \ > > + return ring_size; > > \ > > + > > \ > > + if (prod > cons) > > \ > > + size = prod - cons; > > \ > > + else > > \ > > + size = ring_size - (cons - prod); > > \ > > + return size; > > \ > > +} > > \ > > + > > \ > > +struct name##_data > > { > > \ > > + unsigned char *in; /* half of the allocation */ > > \ > > + unsigned char *out; /* half of the allocation */ > > \ > > +} > > + > > +#define DEFINE_XEN_FLEX_RING_AND_INTF(name) > > \ > > +struct name##_data_intf > > { \ > > + RING_IDX in_cons, in_prod; > > \ > > + > > \ > > + u8 pad1[56]; > > \ > > + > > \ > > + RING_IDX out_cons, out_prod; > > \ > > + > > \ > > + u8 pad2[56]; > > \ > > + > > \ > > + RING_IDX ring_order; > > \ > > + grant_ref_t ref[]; > > \ > > +}; > > \ > > +DEFINE_XEN_FLEX_RING(name) > > + > > +#endif /* __XEN_PUBLIC_IO_RING_H__ */ > > + > > +/* > > + * Local variables: > > + * mode: C > > + * c-file-style: "BSD" > > + * c-basic-offset: 4 > > + * tab-width: 8 > > + * indent-tabs-mode: nil > > + * End: > > + */ > > diff --git a/include/xen/interface/io/xenbus.h > > b/include/xen/interface/io/xenbus.h > > new file mode 100644 > > index 0000000000..f452748b03 > > --- /dev/null > > +++ b/include/xen/interface/io/xenbus.h > > @@ -0,0 +1,81 @@ > > +/************************************************************ > > ***************** > > + * xenbus.h > > + * > > + * Xenbus protocol details. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Copyright (C) 2005 XenSource Ltd. > > + */ > > + > > +#ifndef _XEN_PUBLIC_IO_XENBUS_H > > +#define _XEN_PUBLIC_IO_XENBUS_H > > + > > +/* > > + * The state of either end of the Xenbus, i.e. the current > > communication > > + * status of initialisation across the bus. States here imply > > nothing about > > + * the state of the connection between the driver and the kernel's > > device > > + * layers. > > + */ > > +enum xenbus_state { > > + XenbusStateUnknown = 0, > > + > > + XenbusStateInitialising = 1, > > + > > + /* > > + * InitWait: Finished early initialisation but waiting for > > information > > + * from the peer or hotplug scripts. > > + */ > > + XenbusStateInitWait = 2, > > + > > + /* > > + * Initialised: Waiting for a connection from the peer. > > + */ > > + XenbusStateInitialised = 3, > > + > > + XenbusStateConnected = 4, > > + > > + /* > > + * Closing: The device is being closed due to an error or an > > unplug event. > > + */ > > + XenbusStateClosing = 5, > > + > > + XenbusStateClosed = 6, > > + > > + /* > > + * Reconfiguring: The device is being reconfigured. > > + */ > > + XenbusStateReconfiguring = 7, > > + > > + XenbusStateReconfigured = 8 > > +}; > > + > > +typedef enum xenbus_state XenbusState; > > + > > +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ > > + > > +/* > > + * Local variables: > > + * mode: C > > + * c-file-style: "BSD" > > + * c-basic-offset: 4 > > + * tab-width: 4 > > + * indent-tabs-mode: nil > > + * End: > > + */ > > diff --git a/include/xen/interface/io/xs_wire.h > > b/include/xen/interface/io/xs_wire.h > > new file mode 100644 > > index 0000000000..87987334bf > > --- /dev/null > > +++ b/include/xen/interface/io/xs_wire.h > > @@ -0,0 +1,151 @@ > > +/* > > + * Details of the "wire" protocol between Xen Store Daemon and > > client > > + * library or guest kernel. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Copyright (C) 2005 Rusty Russell IBM Corporation > > + */ > > + > > +#ifndef _XS_WIRE_H > > +#define _XS_WIRE_H > > + > > +enum xsd_sockmsg_type { > > + XS_CONTROL, > > +#define XS_DEBUG XS_CONTROL > > + XS_DIRECTORY, > > + XS_READ, > > + XS_GET_PERMS, > > + XS_WATCH, > > + XS_UNWATCH, > > + XS_TRANSACTION_START, > > + XS_TRANSACTION_END, > > + XS_INTRODUCE, > > + XS_RELEASE, > > + XS_GET_DOMAIN_PATH, > > + XS_WRITE, > > + XS_MKDIR, > > + XS_RM, > > + XS_SET_PERMS, > > + XS_WATCH_EVENT, > > + XS_ERROR, > > + XS_IS_DOMAIN_INTRODUCED, > > + XS_RESUME, > > + XS_SET_TARGET, > > + /* XS_RESTRICT has been removed */ > > + XS_RESET_WATCHES = XS_SET_TARGET + 2, > > + XS_DIRECTORY_PART, > > + > > + XS_TYPE_COUNT, /* Number of valid types. */ > > + > > + XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */ > > +}; > > + > > +#define XS_WRITE_NONE "NONE" > > +#define XS_WRITE_CREATE "CREATE" > > +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" > > + > > +/* We hand errors as strings, for portability. */ > > +struct xsd_errors { > > + int errnum; > > + const char *errstring; > > +}; > > + > > +#ifdef EINVAL > > +#define XSD_ERROR(x) { x, #x } > > +/* LINTED: static unused */ > > +static struct xsd_errors xsd_errors[] > > +#if defined(__GNUC__) > > +__attribute__((unused)) > > +#endif > > + = { > > + XSD_ERROR(EINVAL), > > + XSD_ERROR(EACCES), > > + XSD_ERROR(EEXIST), > > + XSD_ERROR(EISDIR), > > + XSD_ERROR(ENOENT), > > + XSD_ERROR(ENOMEM), > > + XSD_ERROR(ENOSPC), > > + XSD_ERROR(EIO), > > + XSD_ERROR(ENOTEMPTY), > > + XSD_ERROR(ENOSYS), > > + XSD_ERROR(EROFS), > > + XSD_ERROR(EBUSY), > > + XSD_ERROR(EAGAIN), > > + XSD_ERROR(EISCONN), > > + XSD_ERROR(E2BIG) > > +}; > > +#endif > > + > > +struct xsd_sockmsg { > > + u32 type; /* XS_??? */ > > + u32 req_id;/* Request identifier, echoed in daemon's > > response. */ > > + u32 tx_id; /* Transaction id (0 if not related to a > > transaction). */ > > + u32 len; /* Length of data following this. */ > > + > > + /* Generally followed by nul-terminated string(s). */ > > +}; > > + > > +enum xs_watch_type { > > + XS_WATCH_PATH = 0, > > + XS_WATCH_TOKEN > > +}; > > + > > +/* > > + * `incontents 150 xenstore_struct XenStore wire protocol. > > + * > > + * Inter-domain shared memory communications. > > + */ > > +#define XENSTORE_RING_SIZE 1024 > > +typedef u32 XENSTORE_RING_IDX; > > +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE - 1)) > > +struct xenstore_domain_interface { > > + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. > > */ > > + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch > > events. */ > > + XENSTORE_RING_IDX req_cons, req_prod; > > + XENSTORE_RING_IDX rsp_cons, rsp_prod; > > + u32 server_features; /* Bitmap of features supported by the > > server */ > > + u32 connection; > > +}; > > + > > +/* Violating this is very bad. See docs/misc/xenstore.txt. */ > > +#define XENSTORE_PAYLOAD_MAX 4096 > > + > > +/* Violating these just gets you an error back */ > > +#define XENSTORE_ABS_PATH_MAX 3072 > > +#define XENSTORE_REL_PATH_MAX 2048 > > + > > +/* The ability to reconnect a ring */ > > +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1 > > + > > +/* Valid values for the connection field */ > > +#define XENSTORE_CONNECTED 0 /* the steady-state */ > > +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */ > > + > > +#endif /* _XS_WIRE_H */ > > + > > +/* > > + * Local variables: > > + * mode: C > > + * c-file-style: "BSD" > > + * c-basic-offset: 4 > > + * tab-width: 8 > > + * indent-tabs-mode: nil > > + * End: > > + */ > > diff --git a/include/xen/interface/memory.h > > b/include/xen/interface/memory.h > > new file mode 100644 > > index 0000000000..19959da8b4 > > --- /dev/null > > +++ b/include/xen/interface/memory.h > > @@ -0,0 +1,332 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +/************************************************************ > > ****************** > > + * memory.h > > + * > > + * Memory reservation and information. > > + * > > + * Copyright (c) 2005, Keir Fraser <keir at xensource.com> > > + */ > > + > > +#ifndef __XEN_PUBLIC_MEMORY_H__ > > +#define __XEN_PUBLIC_MEMORY_H__ > > + > > +/* > > + * Increase or decrease the specified domain's memory reservation. > > Returns > > a > > + * -ve errcode on failure, or the # extents successfully allocated > > or freed. > > + * arg == addr of struct xen_memory_reservation. > > + */ > > +#define XENMEM_increase_reservation 0 > > +#define XENMEM_decrease_reservation 1 > > +#define XENMEM_populate_physmap 6 > > +struct xen_memory_reservation { > > + /* > > + * XENMEM_increase_reservation: > > + * OUT: MFN (*not* GMFN) bases of extents that were allocated > > + * XENMEM_decrease_reservation: > > + * IN: GMFN bases of extents to free > > + * XENMEM_populate_physmap: > > + * IN: GPFN bases of extents to populate with memory > > + * OUT: GMFN bases of extents that were allocated > > + * (NB. This command also updates the mach_to_phys > > translation > > table) > > + */ > > + GUEST_HANDLE(xen_pfn_t)extent_start; > > + > > + /* Number of extents, and size/alignment of each > > (2^extent_order > > pages). */ > > + xen_ulong_t nr_extents; > > + unsigned int extent_order; > > + > > + /* > > + * Maximum # bits addressable by the user of the allocated > > region (e.g., > > + * I/O devices often have a 32-bit limitation even in 64-bit > > systems). If > > + * zero then the user has no addressing restriction. > > + * This field is not used by XENMEM_decrease_reservation. > > + */ > > + unsigned int address_bits; > > + > > + /* > > + * Domain whose reservation is being changed. > > + * Unprivileged domains can specify only DOMID_SELF. > > + */ > > + domid_t domid; > > + > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); > > + > > +/* > > + * An atomic exchange of memory pages. If return code is zero then > > + * @out.extent_list provides GMFNs of the newly-allocated memory. > > + * Returns zero on complete success, otherwise a negative error > > code. > > + * On complete success then always @nr_exchanged == > > @in.nr_extents. > > + * On partial success @nr_exchanged indicates how much work was > > done. > > + */ > > +#define XENMEM_exchange 11 > > +struct xen_memory_exchange { > > + /* > > + * [IN] Details of memory extents to be exchanged (GMFN bases). > > + * Note that @in.address_bits is ignored and unused. > > + */ > > + struct xen_memory_reservation in; > > + > > + /* > > + * [IN/OUT] Details of new memory extents. > > + * We require that: > > + * 1. @in.domid == @out.domid > > + * 2. @in.nr_extents << @in.extent_order == > > + * @out.nr_extents << @out.extent_order > > + * 3. @in.extent_start and @out.extent_start lists must not > > overlap > > + * 4. @out.extent_start lists GPFN bases to be populated > > + * 5. @out.extent_start is overwritten with allocated GMFN > > bases > > + */ > > + struct xen_memory_reservation out; > > + > > + /* > > + * [OUT] Number of input extents that were successfully > > exchanged: > > + * 1. The first @nr_exchanged input extents were successfully > > + * deallocated. > > + * 2. The corresponding first entries in the output extent > > list correctly > > + * indicate the GMFNs that were successfully exchanged. > > + * 3. All other input and output extents are untouched. > > + * 4. If not all input exents are exchanged then the return > > code of this > > + * command will be non-zero. > > + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! > > + */ > > + xen_ulong_t nr_exchanged; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); > > +/* > > + * Returns the maximum machine frame number of mapped RAM in this > > system. > > + * This command always succeeds (it never returns an error code). > > + * arg == NULL. > > + */ > > +#define XENMEM_maximum_ram_page 2 > > + > > +/* > > + * Returns the current or maximum memory reservation, in pages, of > > the > > + * specified domain (may be DOMID_SELF). Returns -ve errcode on > > failure. > > + * arg == addr of domid_t. > > + */ > > +#define XENMEM_current_reservation 3 > > +#define XENMEM_maximum_reservation 4 > > + > > +/* > > + * Returns a list of MFN bases of 2MB extents comprising the > > machine_to_phys > > + * mapping table. Architectures which do not have a m2p table do > > not > > implement > > + * this command. > > + * arg == addr of xen_machphys_mfn_list_t. > > + */ > > +#define XENMEM_machphys_mfn_list 5 > > +struct xen_machphys_mfn_list { > > + /* > > + * Size of the 'extent_start' array. Fewer entries will be > > filled if the > > + * machphys table is smaller than max_extents * 2MB. > > + */ > > + unsigned int max_extents; > > + > > + /* > > + * Pointer to buffer to fill with list of extent starts. If > > there are > > + * any large discontiguities in the machine address space, 2MB > > gaps in > > + * the machphys table will be represented by an MFN base of > > zero. > > + */ > > + GUEST_HANDLE(xen_pfn_t)extent_start; > > + > > + /* > > + * Number of extents written to the above array. This will be > > smaller > > + * than 'max_extents' if the machphys table is smaller than > > max_e * > > 2MB. > > + */ > > + unsigned int nr_extents; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); > > + > > +/* > > + * Returns the location in virtual address space of the > > machine_to_phys > > + * mapping table. Architectures which do not have a m2p table, or > > which do > > not > > + * map it by default into guest address space, do not implement > > this > > command. > > + * arg == addr of xen_machphys_mapping_t. > > + */ > > +#define XENMEM_machphys_mapping 12 > > +struct xen_machphys_mapping { > > + xen_ulong_t v_start, v_end; /* Start and end virtual > > addresses. */ > > + xen_ulong_t max_mfn; /* Maximum MFN that can be looked > > up. > > */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); > > + > > +#define XENMAPSPACE_shared_info 0 /* shared info page */ > > +#define XENMAPSPACE_grant_table 1 /* grant table page */ > > +#define XENMAPSPACE_gmfn 2 /* GMFN */ > > +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, > > XENMEM_add_to_physmap only. */ > > +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, > > + * XENMEM_add_to_physmap_range only. > > + */ > > +#define XENMAPSPACE_dev_mmio 5 /* device mmio region */ > > + > > +/* > > + * Sets the GPFN at which a particular page appears in the > > specified guest's > > + * pseudophysical address space. > > + * arg == addr of xen_add_to_physmap_t. > > + */ > > +#define XENMEM_add_to_physmap 7 > > +struct xen_add_to_physmap { > > + /* Which domain to change the mapping for. */ > > + domid_t domid; > > + > > + /* Number of pages to go through for gmfn_range */ > > + u16 size; > > + > > + /* Source mapping space. */ > > + unsigned int space; > > + > > + /* Index into source mapping space. */ > > + xen_ulong_t idx; > > + > > + /* GPFN where the source mapping page should appear. */ > > + xen_pfn_t gpfn; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); > > + > > +/*** REMOVED ***/ > > +/*#define XENMEM_translate_gpfn_list 8*/ > > + > > +#define XENMEM_add_to_physmap_range 23 > > +struct xen_add_to_physmap_range { > > + /* IN */ > > + /* Which domain to change the mapping for. */ > > + domid_t domid; > > + u16 space; /* => enum phys_map_space */ > > + > > + /* Number of pages to go through */ > > + u16 size; > > + domid_t foreign_domid; /* IFF gmfn_foreign */ > > + > > + /* Indexes into space being mapped. */ > > + GUEST_HANDLE(xen_ulong_t)idxs; > > + > > + /* GPFN in domid where the source mapping page should appear. > > */ > > + GUEST_HANDLE(xen_pfn_t)gpfns; > > + > > + /* OUT */ > > + > > + /* Per index error code. */ > > + GUEST_HANDLE(int)errs; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range); > > + > > +/* > > + * Returns the pseudo-physical memory map as it was when the > > domain > > + * was started (specified by XENMEM_set_memory_map). > > + * arg == addr of struct xen_memory_map. > > + */ > > +#define XENMEM_memory_map 9 > > +struct xen_memory_map { > > + /* > > + * On call the number of entries which can be stored in buffer. > > On > > + * return the number of entries which have been stored in > > + * buffer. > > + */ > > + unsigned int nr_entries; > > + > > + /* > > + * Entries in the buffer are in the same format as returned by > > the > > + * BIOS INT 0x15 EAX=0xE820 call. > > + */ > > + GUEST_HANDLE(void)buffer; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); > > + > > +/* > > + * Returns the real physical memory map. Passes the same structure > > as > > + * XENMEM_memory_map. > > + * arg == addr of struct xen_memory_map. > > + */ > > +#define XENMEM_machine_memory_map 10 > > + > > +/* > > + * Unmaps the page appearing at a particular GPFN from the > > specified > > guest's > > + * pseudophysical address space. > > + * arg == addr of xen_remove_from_physmap_t. > > + */ > > +#define XENMEM_remove_from_physmap 15 > > +struct xen_remove_from_physmap { > > + /* Which domain to change the mapping for. */ > > + domid_t domid; > > + > > + /* GPFN of the current mapping of the page. */ > > + xen_pfn_t gpfn; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); > > + > > +/* > > + * Get the pages for a particular guest resource, so that they can > > be > > + * mapped directly by a tools domain. > > + */ > > +#define XENMEM_acquire_resource 28 > > +struct xen_mem_acquire_resource { > > + /* IN - The domain whose resource is to be mapped */ > > + domid_t domid; > > + /* IN - the type of resource */ > > + u16 type; > > + > > +#define XENMEM_resource_ioreq_server 0 > > +#define XENMEM_resource_grant_table 1 > > + > > + /* > > + * IN - a type-specific resource identifier, which must be zero > > + * unless stated otherwise. > > + * > > + * type == XENMEM_resource_ioreq_server -> id == ioreq server > > id > > + * type == XENMEM_resource_grant_table -> id defined below > > + */ > > + u32 id; > > + > > +#define XENMEM_resource_grant_table_id_shared 0 > > +#define XENMEM_resource_grant_table_id_status 1 > > + > > + /* IN/OUT - As an IN parameter number of frames of the resource > > + * to be mapped. However, if the specified value is 0 > > and > > + * frame_list is NULL then this field will be set to > > the > > + * maximum value supported by the implementation on > > return. > > + */ > > + u32 nr_frames; > > + /* > > + * OUT - Must be zero on entry. On return this may contain a > > bitwise > > + * OR of the following values. > > + */ > > + u32 flags; > > + > > + /* The resource pages have been assigned to the calling domain > > */ > > +#define _XENMEM_rsrc_acq_caller_owned 0 > > +#define XENMEM_rsrc_acq_caller_owned (1u << > > _XENMEM_rsrc_acq_caller_owned) > > + > > + /* > > + * IN - the index of the initial frame to be mapped. This > > parameter > > + * is ignored if nr_frames is 0. > > + */ > > + u64 frame; > > + > > +#define XENMEM_resource_ioreq_server_frame_bufioreq 0 > > +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) > > + > > + /* > > + * IN/OUT - If the tools domain is PV then, upon return, > > frame_list > > + * will be populated with the MFNs of the resource. > > + * If the tools domain is HVM then it is expected > > that, on > > + * entry, frame_list will be populated with a list of > > GFNs > > + * that will be mapped to the MFNs of the resource. > > + * If -EIO is returned then the frame_list has only > > been > > + * partially mapped and it is up to the caller to > > unmap all > > + * the GFNs. > > + * This parameter may be NULL if nr_frames is 0. > > + */ > > + GUEST_HANDLE(xen_pfn_t)frame_list; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource); > > + > > +#endif /* __XEN_PUBLIC_MEMORY_H__ */ > > diff --git a/include/xen/interface/sched.h > > b/include/xen/interface/sched.h > > new file mode 100644 > > index 0000000000..0f12dcf267 > > --- /dev/null > > +++ b/include/xen/interface/sched.h > > @@ -0,0 +1,188 @@ > > +/************************************************************ > > ****************** > > + * sched.h > > + * > > + * Scheduler state interactions > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Copyright (c) 2005, Keir Fraser <keir at xensource.com> > > + */ > > + > > +#ifndef __XEN_PUBLIC_SCHED_H__ > > +#define __XEN_PUBLIC_SCHED_H__ > > + > > +#include <xen/interface/event_channel.h> > > + > > +/* > > + * Guest Scheduler Operations > > + * > > + * The SCHEDOP interface provides mechanisms for a guest to > > interact > > + * with the scheduler, including yield, blocking and shutting > > itself > > + * down. > > + */ > > + > > +/* > > + * The prototype for this hypercall is: > > + * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) > > + * > > + * @cmd == SCHEDOP_??? (scheduler operation). > > + * @arg == Operation-specific extra argument(s), as described > > below. > > + * ... == Additional Operation-specific extra arguments, > > described below. > > + * > > + * Versions of Xen prior to 3.0.2 provided only the following > > legacy version > > + * of this hypercall, supporting only the commands yield, block > > and > > shutdown: > > + * long sched_op(int cmd, unsigned long arg) > > + * @cmd == SCHEDOP_??? (scheduler operation). > > + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) > > + * == SHUTDOWN_* code (SCHEDOP_shutdown) > > + * > > + * This legacy version is available to new guests as: > > + * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned > > long > > arg) > > + */ > > + > > +/* > > + * Voluntarily yield the CPU. > > + * @arg == NULL. > > + */ > > +#define SCHEDOP_yield 0 > > + > > +/* > > + * Block execution of this VCPU until an event is received for > > processing. > > + * If called with event upcalls masked, this operation will > > atomically > > + * reenable event delivery and check for pending events before > > blocking the > > + * VCPU. This avoids a "wakeup waiting" race. > > + * @arg == NULL. > > + */ > > +#define SCHEDOP_block 1 > > + > > +/* > > + * Halt execution of this domain (all VCPUs) and notify the system > > controller. > > + * @arg == pointer to sched_shutdown structure. > > + * > > + * If the sched_shutdown_t reason is SHUTDOWN_suspend then > > + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the > > MFN > > + * of the guest's start info page. RDX/EDX is the third hypercall > > + * argument. > > + * > > + * In addition, which reason is SHUTDOWN_suspend this hypercall > > + * returns 1 if suspend was cancelled or the domain was merely > > + * checkpointed, and 0 if it is resuming in a new domain. > > + */ > > +#define SCHEDOP_shutdown 2 > > + > > +/* > > + * Poll a set of event-channel ports. Return when one or more are > > pending. > > An > > + * optional timeout may be specified. > > + * @arg == pointer to sched_poll structure. > > + */ > > +#define SCHEDOP_poll 3 > > + > > +/* > > + * Declare a shutdown for another domain. The main use of this > > function is > > + * in interpreting shutdown requests and reasons for fully- > > virtualized > > + * domains. A para-virtualized domain may use SCHEDOP_shutdown > > directly. > > + * @arg == pointer to sched_remote_shutdown structure. > > + */ > > +#define SCHEDOP_remote_shutdown 4 > > + > > +/* > > + * Latch a shutdown code, so that when the domain later shuts down > > it > > + * reports this code to the control tools. > > + * @arg == sched_shutdown, as for SCHEDOP_shutdown. > > + */ > > +#define SCHEDOP_shutdown_code 5 > > + > > +/* > > + * Setup, poke and destroy a domain watchdog timer. > > + * @arg == pointer to sched_watchdog structure. > > + * With id == 0, setup a domain watchdog timer to cause domain > > shutdown > > + * after timeout, returns watchdog id. > > + * With id != 0 and timeout == 0, destroy domain watchdog timer. > > + * With id != 0 and timeout != 0, poke watchdog timer and set new > > timeout. > > + */ > > +#define SCHEDOP_watchdog 6 > > + > > +/* > > + * Override the current vcpu affinity by pinning it to one > > physical cpu or > > + * undo this override restoring the previous affinity. > > + * @arg == pointer to sched_pin_override structure. > > + * > > + * A negative pcpu value will undo a previous pin override and > > restore the > > + * previous cpu affinity. > > + * This call is allowed for the hardware domain only and requires > > the cpu > > + * to be part of the domain's cpupool. > > + */ > > +#define SCHEDOP_pin_override 7 > > + > > +struct sched_shutdown { > > + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); > > + > > +struct sched_poll { > > + GUEST_HANDLE(evtchn_port_t)ports; > > + unsigned int nr_ports; > > + u64 timeout; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(sched_poll); > > + > > +struct sched_remote_shutdown { > > + domid_t domain_id; /* Remote domain ID */ > > + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown); > > + > > +struct sched_watchdog { > > + u32 id; /* watchdog ID */ > > + u32 timeout; /* timeout */ > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog); > > + > > +struct sched_pin_override { > > + s32 pcpu; > > +}; > > + > > +DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override); > > + > > +/* > > + * Reason codes for SCHEDOP_shutdown. These may be interpreted by > > control > > + * software to determine the appropriate action. For the most > > part, Xen does > > + * not care about the shutdown code. > > + */ > > +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up > > and kill. */ > > +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then > > restart. > > */ > > +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, > > kill. > > */ > > +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. > > */ > > +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time > > expired. */ > > + > > +/* > > + * Domain asked to perform 'soft reset' for it. The expected > > behavior is to > > + * reset internal Xen state for the domain returning it to the > > point where it > > + * was created but leaving the domain's memory contents and vCPU > > contexts > > + * intact. This will allow the domain to start over and set up all > > Xen specific > > + * interfaces again. > > + */ > > +#define SHUTDOWN_soft_reset 5 > > +#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. > > */ > > + > > +#endif /* __XEN_PUBLIC_SCHED_H__ */ > > diff --git a/include/xen/interface/xen.h > > b/include/xen/interface/xen.h > > new file mode 100644 > > index 0000000000..964daaedfb > > --- /dev/null > > +++ b/include/xen/interface/xen.h > > @@ -0,0 +1,225 @@ > > +/************************************************************ > > ****************** > > + * xen.h > > + * > > + * Guest OS interface to Xen. > > + * > > + * Permission is hereby granted, free of charge, to any person > > obtaining a > > copy > > + * of this software and associated documentation files (the > > "Software"), to > > + * deal in the Software without restriction, including without > > limitation the > > + * rights to use, copy, modify, merge, publish, distribute, > > sublicense, and/or > > + * sell copies of the Software, and to permit persons to whom the > > Software is > > + * furnished to do so, subject to the following conditions: > > + * > > + * The above copyright notice and this permission notice shall be > > included in > > + * all copies or substantial portions of the Software. > > + * > > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY > > KIND, EXPRESS OR > > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF > > MERCHANTABILITY, > > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO > > EVENT SHALL THE > > + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, > > DAMAGES OR OTHER > > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > > ARISING > > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE > > OR OTHER > > + * DEALINGS IN THE SOFTWARE. > > + * > > + * Copyright (c) 2004, K A Fraser > > + */ > > + > > +#ifndef __XEN_PUBLIC_XEN_H__ > > +#define __XEN_PUBLIC_XEN_H__ > > + > > +#include <xen/arm/interface.h> > > + > > +/* > > + * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). > > + */ > > + > > +/* > > + * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, > > 4, 5. > > + * EAX = return value > > + * (argument registers may be clobbered on return) > > + * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, > > 3, 4, 5, 6. > > + * RAX = return value > > + * (argument registers not clobbered on return; RCX, R11 > > are) > > + */ > > +#define __HYPERVISOR_set_trap_table 0 > > +#define __HYPERVISOR_mmu_update 1 > > +#define __HYPERVISOR_set_gdt 2 > > +#define __HYPERVISOR_stack_switch 3 > > +#define __HYPERVISOR_set_callbacks 4 > > +#define __HYPERVISOR_fpu_taskswitch 5 > > +#define __HYPERVISOR_sched_op_compat 6 > > +#define __HYPERVISOR_platform_op 7 > > +#define __HYPERVISOR_set_debugreg 8 > > +#define __HYPERVISOR_get_debugreg 9 > > +#define __HYPERVISOR_update_descriptor 10 > > +#define __HYPERVISOR_memory_op 12 > > +#define __HYPERVISOR_multicall 13 > > +#define __HYPERVISOR_update_va_mapping 14 > > +#define __HYPERVISOR_set_timer_op 15 > > +#define __HYPERVISOR_event_channel_op_compat 16 > > +#define __HYPERVISOR_xen_version 17 > > +#define __HYPERVISOR_console_io 18 > > +#define __HYPERVISOR_physdev_op_compat 19 > > +#define __HYPERVISOR_grant_table_op 20 > > +#define __HYPERVISOR_vm_assist 21 > > +#define __HYPERVISOR_update_va_mapping_otherdomain 22 > > +#define __HYPERVISOR_iret 23 /* x86 only */ > > +#define __HYPERVISOR_vcpu_op 24 > > +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ > > +#define __HYPERVISOR_mmuext_op 26 > > +#define __HYPERVISOR_xsm_op 27 > > +#define __HYPERVISOR_nmi_op 28 > > +#define __HYPERVISOR_sched_op 29 > > +#define __HYPERVISOR_callback_op 30 > > +#define __HYPERVISOR_xenoprof_op 31 > > +#define __HYPERVISOR_event_channel_op 32 > > +#define __HYPERVISOR_physdev_op 33 > > +#define __HYPERVISOR_hvm_op 34 > > +#define __HYPERVISOR_sysctl 35 > > +#define __HYPERVISOR_domctl 36 > > +#define __HYPERVISOR_kexec_op 37 > > +#define __HYPERVISOR_tmem_op 38 > > +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for > > XenClient */ > > +#define __HYPERVISOR_xenpmu_op 40 > > +#define __HYPERVISOR_dm_op 41 > > + > > +/* Architecture-specific hypercall definitions. */ > > +#define __HYPERVISOR_arch_0 48 > > +#define __HYPERVISOR_arch_1 49 > > +#define __HYPERVISOR_arch_2 50 > > +#define __HYPERVISOR_arch_3 51 > > +#define __HYPERVISOR_arch_4 52 > > +#define __HYPERVISOR_arch_5 53 > > +#define __HYPERVISOR_arch_6 54 > > +#define __HYPERVISOR_arch_7 55 > > + > > +#ifndef __ASSEMBLY__ > > + > > +typedef u16 domid_t; > > + > > +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary > > domains. */ > > +#define DOMID_FIRST_RESERVED (0x7FF0U) > > + > > +/* DOMID_SELF is used in certain contexts to refer to oneself. */ > > +#define DOMID_SELF (0x7FF0U) > > + > > +/* > > + * DOMID_IO is used to restrict page-table updates to mapping I/O > > memory. > > + * Although no Foreign Domain need be specified to map I/O pages, > > DOMID_IO > > + * is useful to ensure that no mappings to the OS's own heap are > > accidentally > > + * installed. (e.g., in Linux this could cause havoc as reference > > counts > > + * aren't adjusted on the I/O-mapping code path). > > + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that > > context can > > + * be specified by any calling domain. > > + */ > > +#define DOMID_IO (0x7FF1U) > > + > > +/* > > + * DOMID_XEN is used to allow privileged domains to map restricted > > parts of > > + * Xen's heap space (e.g., the machine_to_phys table). > > + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only > > permitted if > > + * the caller is privileged. > > + */ > > +#define DOMID_XEN (0x7FF2U) > > + > > +/* DOMID_COW is used as the owner of sharable pages */ > > +#define DOMID_COW (0x7FF3U) > > + > > +/* DOMID_INVALID is used to identify pages with unknown owner. */ > > +#define DOMID_INVALID (0x7FF4U) > > + > > +/* Idle domain. */ > > +#define DOMID_IDLE (0x7FFFU) > > + > > +struct vcpu_info { > > + /* > > + * 'evtchn_upcall_pending' is written non-zero by Xen to > > indicate > > + * a pending notification for a particular VCPU. It is then > > cleared > > + * by the guest OS /before/ checking for pending work, thus > > avoiding > > + * a set-and-check race. Note that the mask is only accessed by > > Xen > > + * on the CPU that is currently hosting the VCPU. This means > > that the > > + * pending and mask flags can be updated by the guest without > > special > > + * synchronisation (i.e., no need for the x86 LOCK prefix). > > + * This may seem suboptimal because if the pending flag is set > > by > > + * a different CPU then an IPI may be scheduled even when the > > mask > > + * is set. However, note: > > + * 1. The task of 'interrupt holdoff' is covered by the per- > > event- > > + * channel mask bits. A 'noisy' event that is continually > > being > > + * triggered can be masked at source at this very precise > > + * granularity. > > + * 2. The main purpose of the per-VCPU mask is therefore to > > restrict > > + * reentrant execution: whether for concurrency control, or > > to > > + * prevent unbounded stack usage. Whatever the purpose, we > > expect > > + * that the mask will be asserted only for short periods at > > a time, > > + * and so the likelihood of a 'spurious' IPI is suitably > > small. > > + * The mask is read before making an event upcall to the guest: > > a > > + * non-zero mask therefore guarantees that the VCPU will not > > receive > > + * an upcall activation. The mask is cleared when the VCPU > > requests > > + * to block: this avoids wakeup-waiting races. > > + */ > > + u8 evtchn_upcall_pending; > > + u8 evtchn_upcall_mask; > > + xen_ulong_t evtchn_pending_sel; > > + struct arch_vcpu_info arch; > > + struct pvclock_vcpu_time_info time; > > +}; /* 64 bytes (x86) */ > > + > > +/* > > + * Xen/kernel shared data -- pointer provided in start_info. > > + * NB. We expect that this struct is smaller than a page. > > + */ > > +struct shared_info { > > + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; > > + > > + /* > > + * A domain can create "event channels" on which it can send > > and > > receive > > + * asynchronous event notifications. There are three classes of > > event > > that > > + * are delivered by this mechanism: > > + * 1. Bi-directional inter- and intra-domain connections. > > Domains must > > + * arrange out-of-band to set up a connection (usually by > > allocating > > + * an unbound 'listener' port and avertising that via a > > storage > > service > > + * such as xenstore). > > + * 2. Physical interrupts. A domain with suitable hardware- > > access > > + * privileges can bind an event-channel port to a physical > > interrupt > > + * source. > > + * 3. Virtual interrupts ('events'). A domain can bind an > > event-channel > > + * port to a virtual interrupt source, such as the virtual- > > timer > > + * device or the emergency console. > > + * > > + * Event channels are addressed by a "port index". Each channel > > is > > + * associated with two bits of information: > > + * 1. PENDING -- notifies the domain that there is a pending > > notification > > + * to be processed. This bit is cleared by the guest. > > + * 2. MASK -- if this bit is clear then a 0->1 transition of > > PENDING > > + * will cause an asynchronous upcall to be scheduled. This > > bit is > > only > > + * updated by the guest. It is read-only within Xen. If a > > channel > > + * becomes pending while the channel is masked then the > > 'edge' is > > lost > > + * (i.e., when the channel is unmasked, the guest must > > manually > > handle > > + * pending notifications as no upcall will be scheduled by > > Xen). > > + * > > + * To expedite scanning of pending notifications, any 0->1 > > pending > > + * transition on an unmasked channel causes a corresponding bit > > in a > > + * per-vcpu selector word to be set. Each bit in the selector > > covers a > > + * 'C long' in the PENDING bitfield array. > > + */ > > + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; > > + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; > > + > > + /* > > + * Wallclock time: updated only by control software. Guests > > should base > > + * their gettimeofday() syscall on this wallclock-base value. > > + */ > > + struct pvclock_wall_clock wc; > > + > > + struct arch_shared_info arch; > > + > > +}; > > + > > +#else /* __ASSEMBLY__ */ > > + > > +/* In assembly code we cannot use C numeric constant suffixes. */ > > +#define mk_unsigned_long(x) x > > + > > +#endif /* !__ASSEMBLY__ */ > > + > > +#endif /* __XEN_PUBLIC_XEN_H__ */ > > -- > > 2.17.1 > >
diff --git a/include/xen/arm/interface.h b/include/xen/arm/interface.h new file mode 100644 index 0000000000..79d5ae8563 --- /dev/null +++ b/include/xen/arm/interface.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/****************************************************************************** + * Guest OS interface to ARM Xen. + * + * Stefano Stabellini <stefano.stabellini at eu.citrix.com>, Citrix, 2012 + */ + +#ifndef _ASM_ARM_XEN_INTERFACE_H +#define _ASM_ARM_XEN_INTERFACE_H + +#ifndef __ASSEMBLY__ +#include <linux/types.h> +#endif + +#define uint64_aligned_t u64 __attribute__((aligned(8))) + +#define __DEFINE_GUEST_HANDLE(name, type) \ + typedef struct { union { type *p; uint64_aligned_t q; }; } \ + __guest_handle_ ## name + +#define DEFINE_GUEST_HANDLE_STRUCT(name) \ + __DEFINE_GUEST_HANDLE(name, struct name) +#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name) +#define GUEST_HANDLE(name) __guest_handle_ ## name + +#define set_xen_guest_handle(hnd, val) \ + do { \ + if (sizeof(hnd) == 8) \ + *(u64 *)&(hnd) = 0; \ + (hnd).p = val; \ + } while (0) + +#define __HYPERVISOR_platform_op_raw __HYPERVISOR_platform_op + +#ifndef __ASSEMBLY__ +/* Explicitly size integers that represent pfns in the interface with + * Xen so that we can have one ABI that works for 32 and 64 bit guests. + * Note that this means that the xen_pfn_t type may be capable of + * representing pfn's which the guest cannot represent in its own pfn + * type. However since pfn space is controlled by the guest this is + * fine since it simply wouldn't be able to create any sure pfns in + * the first place. + */ +typedef u64 xen_pfn_t; +#define PRI_xen_pfn "llx" +typedef u64 xen_ulong_t; +#define PRI_xen_ulong "llx" +typedef s64 xen_long_t; +#define PRI_xen_long "llx" +/* Guest handles for primitive C types. */ +__DEFINE_GUEST_HANDLE(uchar, unsigned char); +__DEFINE_GUEST_HANDLE(uint, unsigned int); +DEFINE_GUEST_HANDLE(char); +DEFINE_GUEST_HANDLE(int); +DEFINE_GUEST_HANDLE(void); +DEFINE_GUEST_HANDLE(u64); +DEFINE_GUEST_HANDLE(u32); +DEFINE_GUEST_HANDLE(xen_pfn_t); +DEFINE_GUEST_HANDLE(xen_ulong_t); + +/* Maximum number of virtual CPUs in multi-processor guests. */ +#define MAX_VIRT_CPUS 1 + +struct arch_vcpu_info { }; +struct arch_shared_info { }; + +/* TODO: Move pvclock definitions some place arch independent */ +struct pvclock_vcpu_time_info { + u32 version; + u32 pad0; + u64 tsc_timestamp; + u64 system_time; + u32 tsc_to_system_mul; + s8 tsc_shift; + u8 flags; + u8 pad[2]; +} __attribute__((__packed__)); /* 32 bytes */ + +/* It is OK to have a 12 bytes struct with no padding because it is packed */ +struct pvclock_wall_clock { + u32 version; + u32 sec; + u32 nsec; + u32 sec_hi; +} __attribute__((__packed__)); +#endif + +#endif /* _ASM_ARM_XEN_INTERFACE_H */ diff --git a/include/xen/interface/event_channel.h b/include/xen/interface/event_channel.h new file mode 100644 index 0000000000..8174999c2f --- /dev/null +++ b/include/xen/interface/event_channel.h @@ -0,0 +1,281 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/****************************************************************************** + * event_channel.h + * + * Event channels between domains. + * + * Copyright (c) 2003-2004, K A Fraser. + */ + +#ifndef __XEN_PUBLIC_EVENT_CHANNEL_H__ +#define __XEN_PUBLIC_EVENT_CHANNEL_H__ + +#include <xen/interface/xen.h> + +typedef u32 evtchn_port_t; +DEFINE_GUEST_HANDLE(evtchn_port_t); + +/* + * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as + * accepting interdomain bindings from domain <remote_dom>. A fresh port + * is allocated in <dom> and returned as <port>. + * NOTES: + * 1. If the caller is unprivileged then <dom> must be DOMID_SELF. + * 2. <rdom> may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_alloc_unbound 6 +struct evtchn_alloc_unbound { + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between + * the calling domain and <remote_dom>. <remote_dom,remote_port> must identify + * a port that is unbound and marked as accepting bindings from the calling + * domain. A fresh port is allocated in the calling domain and returned as + * <local_port>. + * NOTES: + * 2. <remote_dom> may be DOMID_SELF, allowing loopback connections. + */ +#define EVTCHNOP_bind_interdomain 0 +struct evtchn_bind_interdomain { + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; +}; + +/* + * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified + * vcpu. + * NOTES: + * 1. A virtual IRQ may be bound to at most one event channel per vcpu. + * 2. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_virq 1 +struct evtchn_bind_virq { + /* IN parameters. */ + u32 virq; + u32 vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>. + * NOTES: + * 1. A physical IRQ may be bound to at most one event channel per domain. + * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. + */ +#define EVTCHNOP_bind_pirq 2 +struct evtchn_bind_pirq { + /* IN parameters. */ + u32 pirq; +#define BIND_PIRQ__WILL_SHARE 1 + u32 flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. + * NOTES: + * 1. The allocated event channel is bound to the specified vcpu. The binding + * may not be changed. + */ +#define EVTCHNOP_bind_ipi 7 +struct evtchn_bind_ipi { + u32 vcpu; + /* OUT parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_close: Close a local event channel <port>. If the channel is + * interdomain then the remote end is placed in the unbound state + * (EVTCHNSTAT_unbound), awaiting a new connection. + */ +#define EVTCHNOP_close 3 +struct evtchn_close { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_send: Send an event to the remote end of the channel whose local + * endpoint is <port>. + */ +#define EVTCHNOP_send 4 +struct evtchn_send { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_status: Get the current status of the communication channel which + * has an endpoint at <dom, port>. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may obtain the status of an event + * channel for which <dom> is not DOMID_SELF. + */ +#define EVTCHNOP_status 5 +struct evtchn_status { + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + u32 status; + u32 vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + u32 pirq; /* EVTCHNSTAT_pirq */ + u32 virq; /* EVTCHNSTAT_virq */ + } u; +}; + +/* + * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an + * event is pending. + * NOTES: + * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised + * the binding. This binding cannot be changed. + * 2. All other channels notify vcpu0 by default. This default is set when + * the channel is allocated (a port that is freed and subsequently reused + * has its binding reset to vcpu0). + */ +#define EVTCHNOP_bind_vcpu 8 +struct evtchn_bind_vcpu { + /* IN parameters. */ + evtchn_port_t port; + u32 vcpu; +}; + +/* + * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver + * a notification to the appropriate VCPU if an event is pending. + */ +#define EVTCHNOP_unmask 9 +struct evtchn_unmask { + /* IN parameters. */ + evtchn_port_t port; +}; + +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset 10 +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; + +typedef struct evtchn_reset evtchn_reset_t; + +/* + * EVTCHNOP_init_control: initialize the control block for the FIFO ABI. + */ +#define EVTCHNOP_init_control 11 +struct evtchn_init_control { + /* IN parameters. */ + u64 control_gfn; + u32 offset; + u32 vcpu; + /* OUT parameters. */ + u8 link_bits; + u8 _pad[7]; +}; + +/* + * EVTCHNOP_expand_array: add an additional page to the event array. + */ +#define EVTCHNOP_expand_array 12 +struct evtchn_expand_array { + /* IN parameters. */ + u64 array_gfn; +}; + +/* + * EVTCHNOP_set_priority: set the priority for an event channel. + */ +#define EVTCHNOP_set_priority 13 +struct evtchn_set_priority { + /* IN parameters. */ + evtchn_port_t port; + u32 priority; +}; + +struct evtchn_op { + u32 cmd; /* EVTCHNOP_* */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; +}; + +DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); + +/* + * 2-level ABI + */ + +#define EVTCHN_2L_NR_CHANNELS (sizeof(xen_ulong_t) * sizeof(xen_ulong_t) * 64) + +/* + * FIFO ABI + */ + +/* Events may have priorities from 0 (highest) to 15 (lowest). */ +#define EVTCHN_FIFO_PRIORITY_MAX 0 +#define EVTCHN_FIFO_PRIORITY_DEFAULT 7 +#define EVTCHN_FIFO_PRIORITY_MIN 15 + +#define EVTCHN_FIFO_MAX_QUEUES (EVTCHN_FIFO_PRIORITY_MIN + 1) + +typedef u32 event_word_t; + +#define EVTCHN_FIFO_PENDING 31 +#define EVTCHN_FIFO_MASKED 30 +#define EVTCHN_FIFO_LINKED 29 +#define EVTCHN_FIFO_BUSY 28 + +#define EVTCHN_FIFO_LINK_BITS 17 +#define EVTCHN_FIFO_LINK_MASK ((1 << EVTCHN_FIFO_LINK_BITS) - 1) + +#define EVTCHN_FIFO_NR_CHANNELS (1 << EVTCHN_FIFO_LINK_BITS) + +struct evtchn_fifo_control_block { + u32 ready; + u32 _rsvd; + event_word_t head[EVTCHN_FIFO_MAX_QUEUES]; +}; + +#endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h new file mode 100644 index 0000000000..197a0d0d58 --- /dev/null +++ b/include/xen/interface/grant_table.h @@ -0,0 +1,582 @@ +/****************************************************************************** + * grant_table.h + * + * Interface for granting foreign access to page frames, and receiving + * page-ownership transfers. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_GRANT_TABLE_H__ +#define __XEN_PUBLIC_GRANT_TABLE_H__ + +#include <xen/interface/xen.h> + +/*********************************** + * GRANT TABLE REPRESENTATION + */ + +/* Some rough guidelines on accessing and updating grant-table entries + * in a concurrency-safe manner. For more information, Linux contains a + * reference implementation for guest OSes (arch/xen/kernel/grant_table.c). + * + * NB. WMB is a no-op on current-generation x86 processors. However, a + * compiler barrier will still be required. + * + * Introducing a valid entry into the grant table: + * 1. Write ent->domid. + * 2. Write ent->frame: + * GTF_permit_access: Frame to which access is permitted. + * GTF_accept_transfer: Pseudo-phys frame slot being filled by new + * frame, or zero if none. + * 3. Write memory barrier (WMB). + * 4. Write ent->flags, inc. valid type. + * + * Invalidating an unused GTF_permit_access entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & (GTF_reading|GTF_writing)). + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * + * Invalidating an in-use GTF_permit_access entry: + * This cannot be done directly. Request assistance from the domain controller + * which can set a timeout on the use of a grant entry and take necessary + * action. (NB. This is not yet implemented!). + * + * Invalidating an unused GTF_accept_transfer entry: + * 1. flags = ent->flags. + * 2. Observe that !(flags & GTF_transfer_committed). [*] + * 3. Check result of SMP-safe CMPXCHG(&ent->flags, flags, 0). + * NB. No need for WMB as reuse of entry is control-dependent on success of + * step 3, and all architectures guarantee ordering of ctrl-dep writes. + * [*] If GTF_transfer_committed is set then the grant entry is 'committed'. + * The guest must /not/ modify the grant entry until the address of the + * transferred frame is written. It is safe for the guest to spin waiting + * for this to occur (detect by observing GTF_transfer_completed in + * ent->flags). + * + * Invalidating a committed GTF_accept_transfer entry: + * 1. Wait for (ent->flags & GTF_transfer_completed). + * + * Changing a GTF_permit_access from writable to read-only: + * Use SMP-safe CMPXCHG to set GTF_readonly, while checking !GTF_writing. + * + * Changing a GTF_permit_access from read-only to writable: + * Use SMP-safe bit-setting instruction. + */ + +/* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef u32 grant_ref_t; + +/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +struct grant_entry_v1 { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + u16 flags; + /* The domain being granted foreign privileges. [GST] */ + domid_t domid; + /* + * GTF_permit_access: Frame that @domid is allowed to map and access. [GST] + * GTF_accept_transfer: Frame whose ownership transferred by @domid. [XEN] + */ + u32 frame; +}; + +/* + * Type of grant entry. + * GTF_invalid: This grant entry grants no privileges. + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. + */ +#define GTF_invalid (0U << 0) +#define GTF_permit_access (1U << 0) +#define GTF_accept_transfer (2U << 0) +#define GTF_transitive (3U << 0) +#define GTF_type_mask (3U << 0) + +/* + * Subflags for GTF_permit_access. + * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] + */ +#define _GTF_readonly (2) +#define GTF_readonly (1U << _GTF_readonly) +#define _GTF_reading (3) +#define GTF_reading (1U << _GTF_reading) +#define _GTF_writing (4) +#define GTF_writing (1U << _GTF_writing) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U << _GTF_sub_page) + +/* + * Subflags for GTF_accept_transfer: + * GTF_transfer_committed: Xen sets this flag to indicate that it is committed + * to transferring ownership of a page frame. When a guest sees this flag + * it must /not/ modify the grant entry until GTF_transfer_completed is + * set by Xen. + * GTF_transfer_completed: It is safe for the guest to spin-wait on this flag + * after reading GTF_transfer_committed. Xen will always write the frame + * address, followed by ORing this flag, in a timely manner. + */ +#define _GTF_transfer_committed (2) +#define GTF_transfer_committed (1U << _GTF_transfer_committed) +#define _GTF_transfer_completed (3) +#define GTF_transfer_completed (1U << _GTF_transfer_completed) + +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. + */ + +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + u16 flags; + domid_t domid; +}; + +/* + * Version 2 of the grant entry structure, here is a union because three + * different types are suppotted: full_page, sub_page and transitive. + */ +union grant_entry_v2 { + struct grant_entry_header hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + struct grant_entry_header hdr; + u32 pad0; + u64 frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off, at page_off+@length) + * in frame @frame. + */ + struct { + struct grant_entry_header hdr; + u16 page_off; + u16 length; + u64 frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + */ + struct { + struct grant_entry_header hdr; + domid_t trans_domid; + u16 pad0; + grant_ref_t gref; + } transitive; + + u32 __spacer[4]; /* Pad to a power of two */ +}; + +typedef u16 grant_status_t; + +/*********************************** + * GRANT TABLE QUERIES AND USES + */ + +/* + * Handle to track a mapping created via a grant reference. + */ +typedef u32 grant_handle_t; + +/* + * GNTTABOP_map_grant_ref: Map the grant entry (<dom>,<ref>) for access + * by devices and/or host CPUs. If successful, <handle> is a tracking number + * that must be presented later to destroy the mapping(s). On error, <handle> + * is a negative status code. + * NOTES: + * 1. If GNTMAP_device_map is specified then <dev_bus_addr> is the address + * via which I/O devices may access the granted frame. + * 2. If GNTMAP_host_map is specified then a mapping will be added at + * either a host virtual address in the current address space, or at + * a PTE at the specified machine address. The type of mapping to + * perform is selected through the GNTMAP_contains_pte flag, and the + * address is specified in <host_addr>. + * 3. Mappings should only be destroyed via GNTTABOP_unmap_grant_ref. If a + * host mapping is destroyed by other means then it is *NOT* guaranteed + * to be accounted to the correct grant reference! + */ +#define GNTTABOP_map_grant_ref 0 +struct gnttab_map_grant_ref { + /* IN parameters. */ + u64 host_addr; + u32 flags; /* GNTMAP_* */ + grant_ref_t ref; + domid_t dom; + /* OUT parameters. */ + s16 status; /* GNTST_* */ + grant_handle_t handle; + u64 dev_bus_addr; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); + +/* + * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings + * tracked by <handle>. If <host_addr> or <dev_bus_addr> is zero, that + * field is ignored. If non-zero, they must refer to a device/host mapping + * that is tracked by <handle> + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 3. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_grant_ref 1 +struct gnttab_unmap_grant_ref { + /* IN parameters. */ + u64 host_addr; + u64 dev_bus_addr; + grant_handle_t handle; + /* OUT parameters. */ + s16 status; /* GNTST_* */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); + +/* + * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least + * <nr_frames> pages. The frame addresses are written to the <frame_list>. + * Only <nr_frames> addresses are written, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + * 3. Xen may not support more than a single grant-table page per domain. + */ +#define GNTTABOP_setup_table 2 +struct gnttab_setup_table { + /* IN parameters. */ + domid_t dom; + u32 nr_frames; + /* OUT parameters. */ + s16 status; /* GNTST_* */ + + GUEST_HANDLE(xen_pfn_t)frame_list; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); + +/* + * GNTTABOP_dump_table: Dump the contents of the grant table to the + * xen console. Debugging use only. + */ +#define GNTTABOP_dump_table 3 +struct gnttab_dump_table { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + s16 status; /* GNTST_* */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); + +/* + * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The + * foreign domain has previously registered its interest in the transfer via + * <domid, ref>. + * + * Note that, even if the transfer fails, the specified page no longer belongs + * to the calling domain *unless* the error is GNTST_bad_page. + */ +#define GNTTABOP_transfer 4 +struct gnttab_transfer { + /* IN parameters. */ + xen_pfn_t mfn; + domid_t domid; + grant_ref_t ref; + /* OUT parameters. */ + s16 status; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); + +/* + * GNTTABOP_copy: Hypervisor based copy + * source and destinations can be eithers MFNs or, for foreign domains, + * grant references. the foreign domain has to grant read/write access + * in its grant table. + * + * The flags specify what type source and destinations are (either MFN + * or grant reference). + * + * Note that this can also be used to copy data between two domains + * via a third party if the source and destination domains had previously + * grant appropriate access to their pages to the third party. + * + * source_offset specifies an offset in the source frame, dest_offset + * the offset in the target frame and len specifies the number of + * bytes to be copied. + */ + +#define _GNTCOPY_source_gref (0) +#define GNTCOPY_source_gref (1 << _GNTCOPY_source_gref) +#define _GNTCOPY_dest_gref (1) +#define GNTCOPY_dest_gref (1 << _GNTCOPY_dest_gref) + +#define GNTTABOP_copy 5 +struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + u16 offset; + } source, dest; + u16 len; + u16 flags; /* GNTCOPY_* */ + /* OUT parameters. */ + s16 status; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); + +/* + * GNTTABOP_query_size: Query the current and maximum sizes of the shared + * grant table. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +#define GNTTABOP_query_size 6 +struct gnttab_query_size { + /* IN parameters. */ + domid_t dom; + /* OUT parameters. */ + u32 nr_frames; + u32 max_nr_frames; + s16 status; /* GNTST_* */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by <handle> but atomically replace the page table entry with one + * pointing to the machine address under <new_addr>. <new_addr> will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by <handle>. + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_and_replace 7 +struct gnttab_unmap_and_replace { + /* IN parameters. */ + u64 host_addr; + u64 new_addr; + grant_handle_t handle; + /* OUT parameters. */ + s16 status; /* GNTST_* */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_and_replace); + +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +#define GNTTABOP_set_version 8 +struct gnttab_set_version { + /* IN parameters */ + u32 version; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_set_version); + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for <dom>. In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * <nr_frames> specify the size of vector <frame_list>. + * The frame addresses are returned in the <frame_list>. + * Only <nr_frames> addresses are returned, even if the table is larger. + * NOTES: + * 1. <dom> may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF. + */ +#define GNTTABOP_get_status_frames 9 +struct gnttab_get_status_frames { + /* IN parameters. */ + u32 nr_frames; + domid_t dom; + /* OUT parameters. */ + s16 status; /* GNTST_* */ + + GUEST_HANDLE(u64)frame_list; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_status_frames); + +/* + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain <dom>. + */ +#define GNTTABOP_get_version 10 +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + u16 pad; + /* OUT parameters */ + u32 version; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_get_version); + +/* + * Issue one or more cache maintenance operations on a portion of a + * page granted to the calling domain by a foreign domain. + */ +#define GNTTABOP_cache_flush 12 +struct gnttab_cache_flush { + union { + u64 dev_bus_addr; + grant_ref_t ref; + } a; + u16 offset; /* offset from start of grant */ + u16 length; /* size within the grant */ +#define GNTTAB_CACHE_CLEAN (1 << 0) +#define GNTTAB_CACHE_INVAL (1 << 1) +#define GNTTAB_CACHE_SOURCE_GREF (1 << 31) + u32 op; +}; + +DEFINE_GUEST_HANDLE_STRUCT(gnttab_cache_flush); + +/* + * Bitfield values for update_pin_status.flags. + */ + /* Map the grant entry for access by I/O devices. */ +#define _GNTMAP_device_map (0) +#define GNTMAP_device_map (1 << _GNTMAP_device_map) +/* Map the grant entry for access by host CPUs. */ +#define _GNTMAP_host_map (1) +#define GNTMAP_host_map (1 << _GNTMAP_host_map) +/* Accesses to the granted frame will be restricted to read-only access. */ +#define _GNTMAP_readonly (2) +#define GNTMAP_readonly (1 << _GNTMAP_readonly) +/* + * GNTMAP_host_map subflag: + * 0 => The host mapping is usable only by the guest OS. + * 1 => The host mapping is usable by guest OS + current application. + */ +#define _GNTMAP_application_map (3) +#define GNTMAP_application_map (1 << _GNTMAP_application_map) + +/* + * GNTMAP_contains_pte subflag: + * 0 => This map request contains a host virtual address. + * 1 => This map request contains the machine addess of the PTE to update. + */ +#define _GNTMAP_contains_pte (4) +#define GNTMAP_contains_pte (1 << _GNTMAP_contains_pte) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((u32)~0 << _GNTMAP_guest_avail0) + +/* + * Values for error status returns. All errors are -ve. + */ +#define GNTST_okay (0) /* Normal return. */ +#define GNTST_general_error (-1) /* General undefined error. */ +#define GNTST_bad_domain (-2) /* Unrecognsed domain id. */ +#define GNTST_bad_gntref (-3) /* Unrecognised or inappropriate gntref. */ +#define GNTST_bad_handle (-4) /* Unrecognised or inappropriate handle. */ +#define GNTST_bad_virt_addr (-5) /* Inappropriate virtual address to map. */ +#define GNTST_bad_dev_addr (-6) /* Inappropriate device address to unmap.*/ +#define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ +#define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ +#define GNTST_bad_page (-9) /* Specified page was invalid for op. */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Operation not done; try again. */ + +#define GNTTABOP_error_msgs { \ + "okay", \ + "undefined error", \ + "unrecognised domain id", \ + "invalid grant reference", \ + "invalid mapping handle", \ + "invalid virtual address", \ + "invalid device address", \ + "no spare translation slot in the I/O MMU", \ + "permission denied", \ + "bad page", \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "operation not done; try again" \ +} + +#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h new file mode 100644 index 0000000000..1c53cad729 --- /dev/null +++ b/include/xen/interface/hvm/hvm_op.h @@ -0,0 +1,69 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ +#define __XEN_PUBLIC_HVM_HVM_OP_H__ + +/* Get/set subcommands: the second argument of the hypercall is a + * pointer to a xen_hvm_param struct. + */ +#define HVMOP_set_param 0 +#define HVMOP_get_param 1 +struct xen_hvm_param { + domid_t domid; /* IN */ + u32 index; /* IN */ + u64 value; /* IN/OUT */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); + +/* Hint from PV drivers for pagetable destruction. */ +#define HVMOP_pagetable_dying 9 +struct xen_hvm_pagetable_dying { + /* Domain with a pagetable about to be destroyed. */ + domid_t domid; + /* guest physical address of the toplevel pagetable dying */ + aligned_u64 gpa; +}; + +typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); + +enum hvmmem_type_t { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +}; + +#define HVMOP_get_mem_type 15 +/* Return hvmmem_type_t for the specified pfn. */ +struct xen_hvm_get_mem_type { + /* Domain to be queried. */ + domid_t domid; + /* OUT variable. */ + u16 mem_type; + u16 pad[2]; /* align next field on 8-byte boundary */ + /* IN variable. */ + u64 pfn; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type); + +#endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h new file mode 100644 index 0000000000..4d61fc58d9 --- /dev/null +++ b/include/xen/interface/hvm/params.h @@ -0,0 +1,127 @@ +/* + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_PARAMS_H__ +#define __XEN_PUBLIC_HVM_PARAMS_H__ + +#include <xen/interface/hvm/hvm_op.h> + +/* + * Parameter space for HVMOP_{set,get}_param. + */ + +#define HVM_PARAM_CALLBACK_IRQ 0 +/* + * How should CPU0 event-channel notifications be delivered? + * + * If val == 0 then CPU0 event-channel notifications are not delivered. + * If val != 0, val[63:56] encodes the type, as follows: + */ + +#define HVM_PARAM_CALLBACK_TYPE_GSI 0 +/* + * val[55:0] is a delivery GSI. GSI 0 cannot be used, as it aliases val == 0, + * and disables all notifications. + */ + +#define HVM_PARAM_CALLBACK_TYPE_PCI_INTX 1 +/* + * val[55:0] is a delivery PCI INTx line: + * Domain = val[47:32], Bus = val[31:16] DevFn = val[15:8], IntX = val[1:0] + */ + +#if defined(__i386__) || defined(__x86_64__) +#define HVM_PARAM_CALLBACK_TYPE_VECTOR 2 +/* + * val[7:0] is a vector number. Check for XENFEAT_hvm_callback_vector to know + * if this delivery method is available. + */ +#elif defined(__arm__) || defined(__aarch64__) +#define HVM_PARAM_CALLBACK_TYPE_PPI 2 +/* + * val[55:16] needs to be zero. + * val[15:8] is interrupt flag of the PPI used by event-channel: + * bit 8: the PPI is edge(1) or level(0) triggered + * bit 9: the PPI is active low(1) or high(0) + * val[7:0] is a PPI number used by event-channel. + * This is only used by ARM/ARM64 and masking/eoi the interrupt associated to + * the notification is handled by the interrupt controller. + */ +#endif + +#define HVM_PARAM_STORE_PFN 1 +#define HVM_PARAM_STORE_EVTCHN 2 + +#define HVM_PARAM_PAE_ENABLED 4 + +#define HVM_PARAM_IOREQ_PFN 5 + +#define HVM_PARAM_BUFIOREQ_PFN 6 + +/* + * Set mode for virtual timers (currently x86 only): + * delay_for_missed_ticks (default): + * Do not advance a vcpu's time beyond the correct delivery time for + * interrupts that have been missed due to preemption. Deliver missed + * interrupts when the vcpu is rescheduled and advance the vcpu's virtual + * time stepwise for each one. + * no_delay_for_missed_ticks: + * As above, missed interrupts are delivered, but guest time always tracks + * wallclock (i.e., real) time while doing so. + * no_missed_ticks_pending: + * No missed interrupts are held pending. Instead, to ensure ticks are + * delivered at some non-zero rate, if we detect missed ticks then the + * internal tick alarm is not disabled if the VCPU is preempted during the + * next tick period. + * one_missed_tick_pending: + * Missed interrupts are collapsed together and delivered as one 'late tick'. + * Guest time always tracks wallclock (i.e., real) time. + */ +#define HVM_PARAM_TIMER_MODE 10 +#define HVMPTM_delay_for_missed_ticks 0 +#define HVMPTM_no_delay_for_missed_ticks 1 +#define HVMPTM_no_missed_ticks_pending 2 +#define HVMPTM_one_missed_tick_pending 3 + +/* Boolean: Enable virtual HPET (high-precision event timer)? (x86-only) */ +#define HVM_PARAM_HPET_ENABLED 11 + +/* Identity-map page directory used by Intel EPT when CR0.PG=0. */ +#define HVM_PARAM_IDENT_PT 12 + +/* Device Model domain, defaults to 0. */ +#define HVM_PARAM_DM_DOMAIN 13 + +/* ACPI S state: currently support S0 and S3 on x86. */ +#define HVM_PARAM_ACPI_S_STATE 14 + +/* TSS used on Intel when CR0.PE=0. */ +#define HVM_PARAM_VM86_TSS 15 + +/* Boolean: Enable aligning all periodic vpts to reduce interrupts */ +#define HVM_PARAM_VPT_ALIGN 16 + +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +#define HVM_NR_PARAMS 19 + +#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h new file mode 100644 index 0000000000..7d74c99226 --- /dev/null +++ b/include/xen/interface/io/blkif.h @@ -0,0 +1,726 @@ +/****************************************************************************** + * blkif.h + * + * Unified block-device I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2003-2004, Keir Fraser + * Copyright (c) 2012, Spectra Logic Corporation + */ + +#ifndef __XEN_PUBLIC_IO_BLKIF_H__ +#define __XEN_PUBLIC_IO_BLKIF_H__ + +#include "ring.h" +#include "../grant_table.h" + +/* + * Front->back notifications: When enqueuing a new request, sending a + * notification can be made conditional on req_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Backends must set + * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()). + * + * Back->front notifications: When enqueuing a new response, sending a + * notification can be made conditional on rsp_event (i.e., the generic + * hold-off mechanism provided by the ring macros). Frontends must set + * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). + */ + +#ifndef blkif_vdev_t +#define blkif_vdev_t u16 +#endif +#define blkif_sector_t u64 + +/* + * Feature and Parameter Negotiation + * ================================= + * The two halves of a Xen block driver utilize nodes within the XenStore to + * communicate capabilities and to negotiate operating parameters. This + * section enumerates these nodes which reside in the respective front and + * backend portions of the XenStore, following the XenBus convention. + * + * All data in the XenStore is stored as strings. Nodes specifying numeric + * values are encoded in decimal. Integer value ranges listed below are + * expressed as fixed sized integer types capable of storing the conversion + * of a properly formated node string, without loss of information. + * + * Any specified default value is in effect if the corresponding XenBus node + * is not present in the XenStore. + * + * XenStore nodes in sections marked "PRIVATE" are solely for use by the + * driver side whose XenBus tree contains them. + * + * XenStore nodes marked "DEPRECATED" in their notes section should only be + * used to provide interoperability with legacy implementations. + * + * See the XenBus state transition diagram below for details on when XenBus + * nodes must be published and when they can be queried. + * + ***************************************************************************** + * Backend XenBus Nodes + ***************************************************************************** + * + *------------------ Backend Device Identification (PRIVATE) ------------------ + * + * mode + * Values: "r" (read only), "w" (writable) + * + * The read or write access permissions to the backing store to be + * granted to the frontend. + * + * params + * Values: string + * + * A free formatted string providing sufficient information for the + * hotplug script to attach the device and provide a suitable + * handler (ie: a block device) for blkback to use. + * + * physical-device + * Values: "MAJOR:MINOR" + * Notes: 11 + * + * MAJOR and MINOR are the major number and minor number of the + * backing device respectively. + * + * physical-device-path + * Values: path string + * + * A string that contains the absolute path to the disk image. On + * NetBSD and Linux this is always a block device, while on FreeBSD + * it can be either a block device or a regular file. + * + * type + * Values: "file", "phy", "tap" + * + * The type of the backing device/object. + * + * + * direct-io-safe + * Values: 0/1 (boolean) + * Default Value: 0 + * + * The underlying storage is not affected by the direct IO memory + * lifetime bug. See: + * http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html + * + * Therefore this option gives the backend permission to use + * O_DIRECT, notwithstanding that bug. + * + * That is, if this option is enabled, use of O_DIRECT is safe, + * in circumstances where we would normally have avoided it as a + * workaround for that bug. This option is not relevant for all + * backends, and even not necessarily supported for those for + * which it is relevant. A backend which knows that it is not + * affected by the bug can ignore this option. + * + * This option doesn't require a backend to use O_DIRECT, so it + * should not be used to try to control the caching behaviour. + * + *--------------------------------- Features --------------------------------- + * + * feature-barrier + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-flush-cache + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-discard + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the backend can process requests + * containing the BLKIF_OP_DISCARD request opcode. Requests + * of this type may still be returned at any time with the + * BLKIF_RSP_EOPNOTSUPP result code. + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7 + * + * A value of "1" indicates that the backend can keep the grants used + * by the frontend driver mapped, so the same set of grants should be + * used in all transactions. The maximum number of grants the backend + * can map persistently depends on the implementation, but ideally it + * should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this + * feature the backend doesn't need to unmap each grant, preventing + * costly TLB flushes. The backend driver should only map grants + * persistently if the frontend supports it. If a backend driver chooses + * to use the persistent protocol when the frontend doesn't support it, + * it will probably hit the maximum number of persistently mapped grants + * (due to the fact that the frontend won't be reusing the same grants), + * and fall back to non-persistent mode. Backend implementations may + * shrink or expand the number of persistently mapped grants without + * notifying the frontend depending on memory constraints (this might + * cause a performance degradation). + * + * If a backend driver wants to limit the maximum number of persistently + * mapped grants to a value less than RING_SIZE * + * BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to + * discard the grants that are less commonly used. Using a LRU in the + * backend driver paired with a LIFO queue in the frontend will + * allow us to have better performance in this scenario. + * + *----------------------- Request Transport Parameters ------------------------ + * + * max-ring-page-order + * Values: <uint32_t> + * Default Value: 0 + * Notes: 1, 3 + * + * The maximum supported size of the request ring buffer in units of + * lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * max-ring-pages + * Values: <uint32_t> + * Default Value: 1 + * Notes: DEPRECATED, 2, 3 + * + * The maximum supported size of the request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *------------------------- Backend Device Properties ------------------------- + * + * discard-enable + * Values: 0/1 (boolean) + * Default Value: 1 + * + * This optional property, set by the toolstack, instructs the backend + * to offer (or not to offer) discard to the frontend. If the property + * is missing the backend should offer discard if the backing storage + * actually supports it. + * + * discard-alignment + * Values: <uint32_t> + * Default Value: 0 + * Notes: 4, 5 + * + * The offset, in bytes from the beginning of the virtual block device, + * to the first, addressable, discard extent on the underlying device. + * + * discard-granularity + * Values: <uint32_t> + * Default Value: <"sector-size"> + * Notes: 4 + * + * The size, in bytes, of the individually addressable discard extents + * of the underlying device. + * + * discard-secure + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 10 + * + * A value of "1" indicates that the backend can process BLKIF_OP_DISCARD + * requests with the BLKIF_DISCARD_SECURE flag set. + * + * info + * Values: <uint32_t> (bitmap) + * + * A collection of bit flags describing attributes of the backing + * device. The VDISK_* macros define the meaning of each bit + * location. + * + * sector-size + * Values: <uint32_t> + * + * The logical block size, in bytes, of the underlying storage. This + * must be a power of two with a minimum value of 512. + * + * NOTE: Because of implementation bugs in some frontends this must be + * set to 512, unless the frontend advertizes a non-zero value + * in its "feature-large-sector-size" xenbus node. (See below). + * + * physical-sector-size + * Values: <uint32_t> + * Default Value: <"sector-size"> + * + * The physical block size, in bytes, of the backend storage. This + * must be an integer multiple of "sector-size". + * + * sectors + * Values: <u64> + * + * The size of the backend device, expressed in units of "sector-size". + * The product of "sector-size" and "sectors" must also be an integer + * multiple of "physical-sector-size", if that node is present. + * + ***************************************************************************** + * Frontend XenBus Nodes + ***************************************************************************** + * + *----------------------- Request Transport Parameters ----------------------- + * + * event-channel + * Values: <uint32_t> + * + * The identifier of the Xen event channel used to signal activity + * in the ring buffer. + * + * ring-ref + * Values: <uint32_t> + * Notes: 6 + * + * The Xen grant reference granting permission for the backend to map + * the sole page in a single page sized ring buffer. + * + * ring-ref%u + * Values: <uint32_t> + * Notes: 6 + * + * For a frontend providing a multi-page ring, a "number of ring pages" + * sized list of nodes, each containing a Xen grant reference granting + * permission for the backend to map the page of the ring located + * at page index "%u". Page indexes are zero based. + * + * protocol + * Values: string (XEN_IO_PROTO_ABI_*) + * Default Value: XEN_IO_PROTO_ABI_NATIVE + * + * The machine ABI rules governing the format of all ring request and + * response structures. + * + * ring-page-order + * Values: <uint32_t> + * Default Value: 0 + * Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order) + * Notes: 1, 3 + * + * The size of the frontend allocated request ring buffer in units + * of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages, + * etc.). + * + * num-ring-pages + * Values: <uint32_t> + * Default Value: 1 + * Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order)) + * Notes: DEPRECATED, 2, 3 + * + * The size of the frontend allocated request ring buffer in units of + * machine pages. The value must be a power of 2. + * + *--------------------------------- Features --------------------------------- + * + * feature-persistent + * Values: 0/1 (boolean) + * Default Value: 0 + * Notes: 7, 8, 9 + * + * A value of "1" indicates that the frontend will reuse the same grants + * for all transactions, allowing the backend to map them with write + * access (even when it should be read-only). If the frontend hits the + * maximum number of allowed persistently mapped grants, it can fallback + * to non persistent mode. This will cause a performance degradation, + * since the the backend driver will still try to map those grants + * persistently. Since the persistent grants protocol is compatible with + * the previous protocol, a frontend driver can choose to work in + * persistent mode even when the backend doesn't support it. + * + * It is recommended that the frontend driver stores the persistently + * mapped grants in a LIFO queue, so a subset of all persistently mapped + * grants gets used commonly. This is done in case the backend driver + * decides to limit the maximum number of persistently mapped grants + * to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + * + * feature-large-sector-size + * Values: 0/1 (boolean) + * Default Value: 0 + * + * A value of "1" indicates that the frontend will correctly supply and + * interpret all sector-based quantities in terms of the "sector-size" + * value supplied in the backend info, whatever that may be set to. + * If this node is not present or its value is "0" then it is assumed + * that the frontend requires that the logical block size is 512 as it + * is hardcoded (which is the case in some frontend implementations). + * + *------------------------- Virtual Device Properties ------------------------- + * + * device-type + * Values: "disk", "cdrom", "floppy", etc. + * + * virtual-device + * Values: <uint32_t> + * + * A value indicating the physical device to virtualize within the + * frontend's domain. (e.g. "The first ATA disk", "The third SCSI + * disk", etc.) + * + * See docs/misc/vbd-interface.txt for details on the format of this + * value. + * + * Notes + * ----- + * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer + * PV drivers. + * (2) Multi-page ring buffer scheme first used in some RedHat distributions + * including a distribution deployed on certain nodes of the Amazon + * EC2 cluster. + * (3) Support for multi-page ring buffers was implemented independently, + * in slightly different forms, by both Citrix and RedHat/Amazon. + * For full interoperability, block front and backends should publish + * identical ring parameters, adjusted for unit differences, to the + * XenStore nodes used in both schemes. + * (4) Devices that support discard functionality may internally allocate space + * (discardable extents) in units that are larger than the exported logical + * block size. If the backing device has such discardable extents the + * backend should provide both discard-granularity and discard-alignment. + * Providing just one of the two may be considered an error by the frontend. + * Backends supporting discard should include discard-granularity and + * discard-alignment even if it supports discarding individual sectors. + * Frontends should assume discard-alignment == 0 and discard-granularity + * == sector size if these keys are missing. + * (5) The discard-alignment parameter allows a physical device to be + * partitioned into virtual devices that do not necessarily begin or + * end on a discardable extent boundary. + * (6) When there is only a single page allocated to the request ring, + * 'ring-ref' is used to communicate the grant reference for this + * page to the backend. When using a multi-page ring, the 'ring-ref' + * node is not created. Instead 'ring-ref0' - 'ring-refN' are used. + * (7) When using persistent grants data has to be copied from/to the page + * where the grant is currently mapped. The overhead of doing this copy + * however doesn't suppress the speed improvement of not having to unmap + * the grants. + * (8) The frontend driver has to allow the backend driver to map all grants + * with write access, even when they should be mapped read-only, since + * further requests may reuse these grants and require write permissions. + * (9) Linux implementation doesn't have a limit on the maximum number of + * grants that can be persistently mapped in the frontend driver, but + * due to the frontent driver implementation it should never be bigger + * than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. + *(10) The discard-secure property may be present and will be set to 1 if the + * backing device supports secure discard. + *(11) Only used by Linux and NetBSD. + */ + +/* + * Multiple hardware queues/rings: + * If supported, the backend will write the key "multi-queue-max-queues" to + * the directory for that vbd, and set its value to the maximum supported + * number of queues. + * Frontends that are aware of this feature and wish to use it can write the + * key "multi-queue-num-queues" with the number they wish to use, which must be + * greater than zero, and no more than the value reported by the backend in + * "multi-queue-max-queues". + * + * For frontends requesting just one queue, the usual event-channel and + * ring-ref keys are written as before, simplifying the backend processing + * to avoid distinguishing between a frontend that doesn't understand the + * multi-queue feature, and one that does, but requested only one queue. + * + * Frontends requesting two or more queues must not write the toplevel + * event-channel and ring-ref keys, instead writing those keys under sub-keys + * having the name "queue-N" where N is the integer ID of the queue/ring for + * which those keys belong. Queues are indexed from zero. + * For example, a frontend with two queues must write the following set of + * queue-related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + * It is also possible to use multiple queues/rings together with + * feature multi-page ring buffer. + * For example, a frontend requests two queues/rings and the size of each ring + * buffer is two pages must write the following set of related keys: + * + * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2" + * /local/domain/1/device/vbd/0/ring-page-order = "1" + * /local/domain/1/device/vbd/0/queue-0 = "" + * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>" + * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>" + * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>" + * /local/domain/1/device/vbd/0/queue-1 = "" + * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>" + * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>" + * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>" + * + */ + +/* + * STATE DIAGRAMS + * + ***************************************************************************** + * Startup * + ***************************************************************************** + * + * Tool stack creates front and back nodes with state XenbusStateInitialising. + * + * Front Back + * ================================= ===================================== + * XenbusStateInitialising XenbusStateInitialising + * o Query virtual device o Query backend device identification + * properties. data. + * o Setup OS device instance. o Open and validate backend device. + * o Publish backend features and + * transport parameters. + * | + * | + * V + * XenbusStateInitWait + * + * o Query backend features and + * transport parameters. + * o Allocate and initialize the + * request ring. + * o Publish transport parameters + * that will be in effect during + * this connection. + * | + * | + * V + * XenbusStateInitialised + * + * o Query frontend transport parameters. + * o Connect to the request ring and + * event channel. + * o Publish backend device properties. + * | + * | + * V + * XenbusStateConnected + * + * o Query backend device properties. + * o Finalize OS virtual device + * instance. + * | + * | + * V + * XenbusStateConnected + * + * Note: Drivers that do not support any optional features, or the negotiation + * of transport parameters, can skip certain states in the state machine: + * + * o A frontend may transition to XenbusStateInitialised without + * waiting for the backend to enter XenbusStateInitWait. In this + * case, default transport parameters are in effect and any + * transport parameters published by the frontend must contain + * their default values. + * + * o A backend may transition to XenbusStateInitialised, bypassing + * XenbusStateInitWait, without waiting for the frontend to first + * enter the XenbusStateInitialised state. In this case, default + * transport parameters are in effect and any transport parameters + * published by the backend must contain their default values. + * + * Drivers that support optional features and/or transport parameter + * negotiation must tolerate these additional state transition paths. + * In general this means performing the work of any skipped state + * transition, if it has not already been performed, in addition to the + * work associated with entry into the current state. + */ + +/* + * REQUEST CODES. + */ +#define BLKIF_OP_READ 0 +#define BLKIF_OP_WRITE 1 +/* + * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER + * operation code ("barrier request") must be completed prior to the + * execution of the barrier request. All writes issued after the barrier + * request must not execute until after the completion of the barrier request. + * + * Optional. See "feature-barrier" XenBus node documentation above. + */ +#define BLKIF_OP_WRITE_BARRIER 2 +/* + * Commit any uncommitted contents of the backing device's volatile cache + * to stable storage. + * + * Optional. See "feature-flush-cache" XenBus node documentation above. + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 +/* + * Used in SLES sources for device specific command packet + * contained within the request. Reserved for that purpose. + */ +#define BLKIF_OP_RESERVED_1 4 +/* + * Indicate to the backend device that a region of storage is no longer in + * use, and may be discarded at any time without impact to the client. If + * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the + * discarded region on the device must be rendered unrecoverable before the + * command returns. + * + * This operation is analogous to performing a trim (ATA) or unamp (SCSI), + * command on a native device. + * + * More information about trim/unmap operations can be found at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + * http://www.seagate.com/staticfiles/support/disc/manuals/ + * Interface%20manuals/100293068c.pdf + * + * Optional. See "feature-discard", "discard-alignment", + * "discard-granularity", and "discard-secure" in the XenBus node + * documentation above. + */ +#define BLKIF_OP_DISCARD 5 + +/* + * Recognized if "feature-max-indirect-segments" in present in the backend + * xenbus info. The "feature-max-indirect-segments" node contains the maximum + * number of segments allowed by the backend per request. If the node is + * present, the frontend might use blkif_request_indirect structs in order to + * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The + * maximum number of indirect segments is fixed by the backend, but the + * frontend can issue requests with any number of indirect segments as long as + * it's less than the number provided by the backend. The indirect_grefs field + * in blkif_request_indirect should be filled by the frontend with the + * grant references of the pages that are holding the indirect segments. + * These pages are filled with an array of blkif_request_segment that hold the + * information about the segments. The number of indirect pages to use is + * determined by the number of segments an indirect request contains. Every + * indirect page can contain a maximum of + * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to + * calculate the number of indirect pages to use we have to do + * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). + * + * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* + * create the "feature-max-indirect-segments" node! + */ +#define BLKIF_OP_INDIRECT 6 + +/* + * Maximum scatter/gather segments per request. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. + * NB. This could be 12 if the ring indexes weren't stored in the same page. + */ +#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 + +/* + * Maximum number of indirect pages to use per request. + */ +#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 + +/* + * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as + * 'sector_number' in blkif_request, blkif_request_discard and + * blkif_request_indirect are sector-based quantities. See the description + * of the "feature-large-sector-size" frontend xenbus node above for + * more information. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + u8 first_sect, last_sect; +}; + +/* + * Starting ring element for any I/O request. + */ +struct blkif_request { + u8 operation; /* BLKIF_OP_??? */ + u8 nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + u64 id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; + +typedef struct blkif_request blkif_request_t; + +/* + * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD + * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request) + */ +struct blkif_request_discard { + u8 operation; /* BLKIF_OP_DISCARD */ + u8 flag; /* BLKIF_DISCARD_SECURE or zero */ +#define BLKIF_DISCARD_SECURE (1 << 0) /* ignored if discard-secure=0 */ + blkif_vdev_t handle; /* same as for read/write requests */ + u64 id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk */ + u64 nr_sectors; /* number of contiguous sectors to discard*/ +}; + +typedef struct blkif_request_discard blkif_request_discard_t; + +struct blkif_request_indirect { + u8 operation; /* BLKIF_OP_INDIRECT */ + u8 indirect_op; /* BLKIF_OP_{READ/WRITE} */ + u16 nr_segments; /* number of segments */ + u64 id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + blkif_vdev_t handle; /* same as for read/write requests */ + grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST]; +#ifdef __i386__ + u64 pad; /* Make it 64 byte aligned on i386 */ +#endif +}; + +typedef struct blkif_request_indirect blkif_request_indirect_t; + +struct blkif_response { + u64 id; /* copied from request */ + u8 operation; /* copied from request */ + s16 status; /* BLKIF_RSP_??? */ +}; + +typedef struct blkif_response blkif_response_t; + +/* + * STATUS RETURN CODES. + */ + /* Operation not supported (only happens on barrier writes). */ +#define BLKIF_RSP_EOPNOTSUPP -2 + /* Operation failed for some unspecified reason (-EIO). */ +#define BLKIF_RSP_ERROR -1 + /* Operation completed successfully. */ +#define BLKIF_RSP_OKAY 0 + +/* + * Generate blkif ring structures and types. + */ +DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response); + +#define VDISK_CDROM 0x1 +#define VDISK_REMOVABLE 0x2 +#define VDISK_READONLY 0x4 + +#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/io/console.h b/include/xen/interface/io/console.h new file mode 100644 index 0000000000..3489fc7a60 --- /dev/null +++ b/include/xen/interface/io/console.h @@ -0,0 +1,56 @@ +/****************************************************************************** + * console.h + * + * Console I/O interface for Xen guest OSes. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser + */ + +#ifndef __XEN_PUBLIC_IO_CONSOLE_H__ +#define __XEN_PUBLIC_IO_CONSOLE_H__ + +typedef u32 XENCONS_RING_IDX; + +#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring) - 1)) + +struct xencons_interface { + char in[1024]; + char out[2048]; + XENCONS_RING_IDX in_cons, in_prod; + XENCONS_RING_IDX out_cons, out_prod; +}; + +#ifdef XEN_WANT_FLEX_CONSOLE_RING +#include "ring.h" +DEFINE_XEN_FLEX_RING(xencons); +#endif + +#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/io/protocols.h b/include/xen/interface/io/protocols.h new file mode 100644 index 0000000000..52b4de0f81 --- /dev/null +++ b/include/xen/interface/io/protocols.h @@ -0,0 +1,42 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2008, Keir Fraser + */ + +#ifndef __XEN_PROTOCOLS_H__ +#define __XEN_PROTOCOLS_H__ + +#define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" +#define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" +#define XEN_IO_PROTO_ABI_ARM "arm-abi" + +#if defined(__i386__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 +#elif defined(__x86_64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 +#elif defined(__arm__) || defined(__aarch64__) +# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM +#else +# error arch fixup needed here +#endif + +#endif diff --git a/include/xen/interface/io/ring.h b/include/xen/interface/io/ring.h new file mode 100644 index 0000000000..4e02678e3c --- /dev/null +++ b/include/xen/interface/io/ring.h @@ -0,0 +1,479 @@ +/****************************************************************************** + * ring.h + * + * Shared producer-consumer ring macros. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Tim Deegan and Andrew Warfield November 2004. + */ + +#ifndef __XEN_PUBLIC_IO_RING_H__ +#define __XEN_PUBLIC_IO_RING_H__ + +/* + * When #include'ing this header, you need to provide the following + * declaration upfront: + * - standard integers types (u8, u16, etc) + * They are provided by stdint.h of the standard headers. + * + * In addition, if you intend to use the FLEX macros, you also need to + * provide the following, before invoking the FLEX macros: + * - size_t + * - memcpy + * - grant_ref_t + * These declarations are provided by string.h of the standard headers, + * and grant_table.h from the Xen public headers. + */ + +#include <xen/interface/grant_table.h> + +typedef unsigned int RING_IDX; + +/* Round a 32-bit unsigned constant down to the nearest power of two. */ +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) +#define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) +#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) +#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x)) + +/* + * Calculate size of a shared ring, given the total available space for the + * ring and indexes (_sz), and the name tag of the request/response structure. + * A ring contains as many entries as will fit, rounded down to the nearest + * power of two (so we can mask with (size-1) to loop around). + */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + +/* + * Macros to make the correct C datatypes for a new kind of ring. + * + * To make a new ring datatype, you need to have two message structures, + * let's say request_t, and response_t already defined. + * + * In a header where you want the ring datatype declared, you then do: + * + * DEFINE_RING_TYPES(mytag, request_t, response_t); + * + * These expand out to give you a set of types, as you can see below. + * The most important of these are: + * + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. + * + * To initialize a ring in your code you need to know the location and size + * of the shared memory area (PAGE_SIZE, for instance). To initialise + * the front half: + * + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + * + * Initializing the back follows similarly (note that only the front + * initializes the shared ring): + * + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); + */ + +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + union { \ + struct { \ + u8 smartpoll_active; \ + } netif; \ + struct { \ + u8 msg; \ + } tapif_user; \ + u8 pvt_pad[4]; \ + } pvt; \ + u8 __pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t + +/* + * Macros for manipulating rings. + * + * FRONT_RING_whatever works on the "front end" of a ring: here + * requests are pushed on to the ring and responses taken off it. + * + * BACK_RING_whatever works on the "back end" of a ring: here + * requests are taken off the ring and responses put on. + * + * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. + * This is OK in 1-for-1 request-response situations where the + * requestor (front end) never has more than RING_SIZE()-1 + * outstanding requests. + */ + +/* Initialising empty rings */ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ +} while (0) + +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ +} while (0) + +/* How big is this ring? */ +#define RING_SIZE(_r) \ + ((_r)->nr_ents) + +/* Number of free requests (for use on front side only). */ +#define RING_FREE_REQUESTS(_r) \ + (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) + +/* Test if there is an empty slot available on the front ring. + * (This is only meaningful from the front. ) + */ +#define RING_FULL(_r) \ + (RING_FREE_REQUESTS(_r) == 0) + +/* Test if there are outstanding messages to be processed on a ring. */ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ + ((_r)->sring->rsp_prod - (_r)->rsp_cons) + +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif + +/* Direct access to individual ring elements, by index. */ +#define RING_GET_REQUEST(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) + +/* + * Get a local copy of a request. + * + * Use this in preference to RING_GET_REQUEST() so all processing is + * done on a local copy that cannot be modified by the other end. + * + * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this + * to be ineffective where _req is a struct which consists of only bitfields. + */ +#define RING_COPY_REQUEST(_r, _idx, _req) do { \ + /* Use volatile to force the copy into _req. */ \ + *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx); \ +} while (0) + +#define RING_GET_RESPONSE(_r, _idx) \ + (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) + +/* Loop termination condition: Would the specified index overflow the ring? */ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ + (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) + +/* Ill-behaved frontend determination: Can there be this many requests? */ +#define RING_REQUEST_PROD_OVERFLOW(_r, _prod) \ + (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r)) + +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +} while (0) + +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +} while (0) + +/* + * Notification hold-off (req_event and rsp_event): + * + * When queueing requests or responses on a shared ring, it may not always be + * necessary to notify the remote end. For example, if requests are in flight + * in a backend, the front may be able to queue further requests without + * notifying the back (if the back checks for new requests when it queues + * responses). + * + * When enqueuing requests or responses: + * + * Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument + * is a boolean return value. True indicates that the receiver requires an + * asynchronous notification. + * + * After dequeuing requests or responses (before sleeping the connection): + * + * Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES(). + * The second argument is a boolean return value. True indicates that there + * are pending messages on the ring (i.e., the connection should not be put + * to sleep). + * + * These macros will set the req_event/rsp_event field to trigger a + * notification on the very next message that is enqueued. If you want to + * create batches of work (i.e., only receive a notification after several + * messages have been enqueued) then you will need to create a customised + * version of the FINAL_CHECK macro in your own code, which sets the event + * field appropriately. + */ + +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) \ + break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +} while (0) + +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) \ + break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +} while (0) + +/* + * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and + * functions to check if there is data on the ring, and to read and + * write to them. + * + * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but + * does not define the indexes page. As different protocols can have + * extensions to the basic format, this macro allow them to define their + * own struct. + * + * XEN_FLEX_RING_SIZE + * Convenience macro to calculate the size of one of the two rings + * from the overall order. + * + * $NAME_mask + * Function to apply the size mask to an index, to reduce the index + * within the range [0-size]. + * + * $NAME_read_packet + * Function to read data from the ring. The amount of data to read is + * specified by the "size" argument. + * + * $NAME_write_packet + * Function to write data to the ring. The amount of data to write is + * specified by the "size" argument. + * + * $NAME_get_ring_ptr + * Convenience function that returns a pointer to read/write to the + * ring at the right location. + * + * $NAME_data_intf + * Indexes page, shared between frontend and backend. It also + * contains the array of grant refs. + * + * $NAME_queued + * Function to calculate how many bytes are currently on the ring, + * ready to be read. It can also be used to calculate how much free + * space is currently on the ring (XEN_FLEX_RING_SIZE() - + * $NAME_queued()). + */ + +#ifndef XEN_PAGE_SHIFT +/* The PAGE_SIZE for ring protocols and hypercall interfaces is always + * 4K, regardless of the architecture, and page granularity chosen by + * operating systems. + */ +#define XEN_PAGE_SHIFT 12 +#endif +#define XEN_FLEX_RING_SIZE(order) \ + (1UL << ((order) + XEN_PAGE_SHIFT - 1)) + +#define DEFINE_XEN_FLEX_RING(name) \ +static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size) \ +{ \ + return idx & (ring_size - 1); \ +} \ + \ +static inline unsigned char *name##_get_ring_ptr(unsigned char *buf, \ + RING_IDX idx, \ + RING_IDX ring_size) \ +{ \ + return buf + name##_mask(idx, ring_size); \ +} \ + \ +static inline void name##_read_packet(void *opaque, \ + const unsigned char *buf, \ + size_t size, \ + RING_IDX masked_prod, \ + RING_IDX *masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_cons < masked_prod || \ + size <= ring_size - *masked_cons) { \ + memcpy(opaque, buf + *masked_cons, size); \ + } else { \ + memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons); \ + memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf, \ + size - (ring_size - *masked_cons)); \ + } \ + *masked_cons = name##_mask(*masked_cons + size, ring_size); \ +} \ + \ +static inline void name##_write_packet(unsigned char *buf, \ + const void *opaque, \ + size_t size, \ + RING_IDX *masked_prod, \ + RING_IDX masked_cons, \ + RING_IDX ring_size) \ +{ \ + if (*masked_prod < masked_cons || \ + size <= ring_size - *masked_prod) { \ + memcpy(buf + *masked_prod, opaque, size); \ + } else { \ + memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod); \ + memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod), \ + size - (ring_size - *masked_prod)); \ + } \ + *masked_prod = name##_mask(*masked_prod + size, ring_size); \ +} \ + \ +static inline RING_IDX name##_queued(RING_IDX prod, \ + RING_IDX cons, \ + RING_IDX ring_size) \ +{ \ + RING_IDX size; \ + \ + if (prod == cons) \ + return 0; \ + \ + prod = name##_mask(prod, ring_size); \ + cons = name##_mask(cons, ring_size); \ + \ + if (prod == cons) \ + return ring_size; \ + \ + if (prod > cons) \ + size = prod - cons; \ + else \ + size = ring_size - (cons - prod); \ + return size; \ +} \ + \ +struct name##_data { \ + unsigned char *in; /* half of the allocation */ \ + unsigned char *out; /* half of the allocation */ \ +} + +#define DEFINE_XEN_FLEX_RING_AND_INTF(name) \ +struct name##_data_intf { \ + RING_IDX in_cons, in_prod; \ + \ + u8 pad1[56]; \ + \ + RING_IDX out_cons, out_prod; \ + \ + u8 pad2[56]; \ + \ + RING_IDX ring_order; \ + grant_ref_t ref[]; \ +}; \ +DEFINE_XEN_FLEX_RING(name) + +#endif /* __XEN_PUBLIC_IO_RING_H__ */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/io/xenbus.h b/include/xen/interface/io/xenbus.h new file mode 100644 index 0000000000..f452748b03 --- /dev/null +++ b/include/xen/interface/io/xenbus.h @@ -0,0 +1,81 @@ +/***************************************************************************** + * xenbus.h + * + * Xenbus protocol details. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 XenSource Ltd. + */ + +#ifndef _XEN_PUBLIC_IO_XENBUS_H +#define _XEN_PUBLIC_IO_XENBUS_H + +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 +}; + +typedef enum xenbus_state XenbusState; + +#endif /* _XEN_PUBLIC_IO_XENBUS_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h new file mode 100644 index 0000000000..87987334bf --- /dev/null +++ b/include/xen/interface/io/xs_wire.h @@ -0,0 +1,151 @@ +/* + * Details of the "wire" protocol between Xen Store Daemon and client + * library or guest kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (C) 2005 Rusty Russell IBM Corporation + */ + +#ifndef _XS_WIRE_H +#define _XS_WIRE_H + +enum xsd_sockmsg_type { + XS_CONTROL, +#define XS_DEBUG XS_CONTROL + XS_DIRECTORY, + XS_READ, + XS_GET_PERMS, + XS_WATCH, + XS_UNWATCH, + XS_TRANSACTION_START, + XS_TRANSACTION_END, + XS_INTRODUCE, + XS_RELEASE, + XS_GET_DOMAIN_PATH, + XS_WRITE, + XS_MKDIR, + XS_RM, + XS_SET_PERMS, + XS_WATCH_EVENT, + XS_ERROR, + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + /* XS_RESTRICT has been removed */ + XS_RESET_WATCHES = XS_SET_TARGET + 2, + XS_DIRECTORY_PART, + + XS_TYPE_COUNT, /* Number of valid types. */ + + XS_INVALID = 0xffff /* Guaranteed to remain an invalid type */ +}; + +#define XS_WRITE_NONE "NONE" +#define XS_WRITE_CREATE "CREATE" +#define XS_WRITE_CREATE_EXCL "CREATE|EXCL" + +/* We hand errors as strings, for portability. */ +struct xsd_errors { + int errnum; + const char *errstring; +}; + +#ifdef EINVAL +#define XSD_ERROR(x) { x, #x } +/* LINTED: static unused */ +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { + XSD_ERROR(EINVAL), + XSD_ERROR(EACCES), + XSD_ERROR(EEXIST), + XSD_ERROR(EISDIR), + XSD_ERROR(ENOENT), + XSD_ERROR(ENOMEM), + XSD_ERROR(ENOSPC), + XSD_ERROR(EIO), + XSD_ERROR(ENOTEMPTY), + XSD_ERROR(ENOSYS), + XSD_ERROR(EROFS), + XSD_ERROR(EBUSY), + XSD_ERROR(EAGAIN), + XSD_ERROR(EISCONN), + XSD_ERROR(E2BIG) +}; +#endif + +struct xsd_sockmsg { + u32 type; /* XS_??? */ + u32 req_id;/* Request identifier, echoed in daemon's response. */ + u32 tx_id; /* Transaction id (0 if not related to a transaction). */ + u32 len; /* Length of data following this. */ + + /* Generally followed by nul-terminated string(s). */ +}; + +enum xs_watch_type { + XS_WATCH_PATH = 0, + XS_WATCH_TOKEN +}; + +/* + * `incontents 150 xenstore_struct XenStore wire protocol. + * + * Inter-domain shared memory communications. + */ +#define XENSTORE_RING_SIZE 1024 +typedef u32 XENSTORE_RING_IDX; +#define MASK_XENSTORE_IDX(idx) ((idx) & (XENSTORE_RING_SIZE - 1)) +struct xenstore_domain_interface { + char req[XENSTORE_RING_SIZE]; /* Requests to xenstore daemon. */ + char rsp[XENSTORE_RING_SIZE]; /* Replies and async watch events. */ + XENSTORE_RING_IDX req_cons, req_prod; + XENSTORE_RING_IDX rsp_cons, rsp_prod; + u32 server_features; /* Bitmap of features supported by the server */ + u32 connection; +}; + +/* Violating this is very bad. See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 + +/* Violating these just gets you an error back */ +#define XENSTORE_ABS_PATH_MAX 3072 +#define XENSTORE_REL_PATH_MAX 2048 + +/* The ability to reconnect a ring */ +#define XENSTORE_SERVER_FEATURE_RECONNECTION 1 + +/* Valid values for the connection field */ +#define XENSTORE_CONNECTED 0 /* the steady-state */ +#define XENSTORE_RECONNECT 1 /* guest has initiated a reconnect */ + +#endif /* _XS_WIRE_H */ + +/* + * Local variables: + * mode: C + * c-file-style: "BSD" + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + */ diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h new file mode 100644 index 0000000000..19959da8b4 --- /dev/null +++ b/include/xen/interface/memory.h @@ -0,0 +1,332 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/****************************************************************************** + * memory.h + * + * Memory reservation and information. + * + * Copyright (c) 2005, Keir Fraser <keir at xensource.com> + */ + +#ifndef __XEN_PUBLIC_MEMORY_H__ +#define __XEN_PUBLIC_MEMORY_H__ + +/* + * Increase or decrease the specified domain's memory reservation. Returns a + * -ve errcode on failure, or the # extents successfully allocated or freed. + * arg == addr of struct xen_memory_reservation. + */ +#define XENMEM_increase_reservation 0 +#define XENMEM_decrease_reservation 1 +#define XENMEM_populate_physmap 6 +struct xen_memory_reservation { + /* + * XENMEM_increase_reservation: + * OUT: MFN (*not* GMFN) bases of extents that were allocated + * XENMEM_decrease_reservation: + * IN: GMFN bases of extents to free + * XENMEM_populate_physmap: + * IN: GPFN bases of extents to populate with memory + * OUT: GMFN bases of extents that were allocated + * (NB. This command also updates the mach_to_phys translation table) + */ + GUEST_HANDLE(xen_pfn_t)extent_start; + + /* Number of extents, and size/alignment of each (2^extent_order pages). */ + xen_ulong_t nr_extents; + unsigned int extent_order; + + /* + * Maximum # bits addressable by the user of the allocated region (e.g., + * I/O devices often have a 32-bit limitation even in 64-bit systems). If + * zero then the user has no addressing restriction. + * This field is not used by XENMEM_decrease_reservation. + */ + unsigned int address_bits; + + /* + * Domain whose reservation is being changed. + * Unprivileged domains can specify only DOMID_SELF. + */ + domid_t domid; + +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); + +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + xen_ulong_t nr_exchanged; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); +/* + * Returns the maximum machine frame number of mapped RAM in this system. + * This command always succeeds (it never returns an error code). + * arg == NULL. + */ +#define XENMEM_maximum_ram_page 2 + +/* + * Returns the current or maximum memory reservation, in pages, of the + * specified domain (may be DOMID_SELF). Returns -ve errcode on failure. + * arg == addr of domid_t. + */ +#define XENMEM_current_reservation 3 +#define XENMEM_maximum_reservation 4 + +/* + * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys + * mapping table. Architectures which do not have a m2p table do not implement + * this command. + * arg == addr of xen_machphys_mfn_list_t. + */ +#define XENMEM_machphys_mfn_list 5 +struct xen_machphys_mfn_list { + /* + * Size of the 'extent_start' array. Fewer entries will be filled if the + * machphys table is smaller than max_extents * 2MB. + */ + unsigned int max_extents; + + /* + * Pointer to buffer to fill with list of extent starts. If there are + * any large discontiguities in the machine address space, 2MB gaps in + * the machphys table will be represented by an MFN base of zero. + */ + GUEST_HANDLE(xen_pfn_t)extent_start; + + /* + * Number of extents written to the above array. This will be smaller + * than 'max_extents' if the machphys table is smaller than max_e * 2MB. + */ + unsigned int nr_extents; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); + +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); + +#define XENMAPSPACE_shared_info 0 /* shared info page */ +#define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ +#define XENMAPSPACE_gmfn_range 3 /* GMFN range, XENMEM_add_to_physmap only. */ +#define XENMAPSPACE_gmfn_foreign 4 /* GMFN from another dom, + * XENMEM_add_to_physmap_range only. + */ +#define XENMAPSPACE_dev_mmio 5 /* device mmio region */ + +/* + * Sets the GPFN at which a particular page appears in the specified guest's + * pseudophysical address space. + * arg == addr of xen_add_to_physmap_t. + */ +#define XENMEM_add_to_physmap 7 +struct xen_add_to_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* Number of pages to go through for gmfn_range */ + u16 size; + + /* Source mapping space. */ + unsigned int space; + + /* Index into source mapping space. */ + xen_ulong_t idx; + + /* GPFN where the source mapping page should appear. */ + xen_pfn_t gpfn; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); + +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ + +#define XENMEM_add_to_physmap_range 23 +struct xen_add_to_physmap_range { + /* IN */ + /* Which domain to change the mapping for. */ + domid_t domid; + u16 space; /* => enum phys_map_space */ + + /* Number of pages to go through */ + u16 size; + domid_t foreign_domid; /* IFF gmfn_foreign */ + + /* Indexes into space being mapped. */ + GUEST_HANDLE(xen_ulong_t)idxs; + + /* GPFN in domid where the source mapping page should appear. */ + GUEST_HANDLE(xen_pfn_t)gpfns; + + /* OUT */ + + /* Per index error code. */ + GUEST_HANDLE(int)errs; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap_range); + +/* + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of struct xen_memory_map. + */ +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; + + /* + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. + */ + GUEST_HANDLE(void)buffer; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of struct xen_memory_map. + */ +#define XENMEM_machine_memory_map 10 + +/* + * Unmaps the page appearing at a particular GPFN from the specified guest's + * pseudophysical address space. + * arg == addr of xen_remove_from_physmap_t. + */ +#define XENMEM_remove_from_physmap 15 +struct xen_remove_from_physmap { + /* Which domain to change the mapping for. */ + domid_t domid; + + /* GPFN of the current mapping of the page. */ + xen_pfn_t gpfn; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap); + +/* + * Get the pages for a particular guest resource, so that they can be + * mapped directly by a tools domain. + */ +#define XENMEM_acquire_resource 28 +struct xen_mem_acquire_resource { + /* IN - The domain whose resource is to be mapped */ + domid_t domid; + /* IN - the type of resource */ + u16 type; + +#define XENMEM_resource_ioreq_server 0 +#define XENMEM_resource_grant_table 1 + + /* + * IN - a type-specific resource identifier, which must be zero + * unless stated otherwise. + * + * type == XENMEM_resource_ioreq_server -> id == ioreq server id + * type == XENMEM_resource_grant_table -> id defined below + */ + u32 id; + +#define XENMEM_resource_grant_table_id_shared 0 +#define XENMEM_resource_grant_table_id_status 1 + + /* IN/OUT - As an IN parameter number of frames of the resource + * to be mapped. However, if the specified value is 0 and + * frame_list is NULL then this field will be set to the + * maximum value supported by the implementation on return. + */ + u32 nr_frames; + /* + * OUT - Must be zero on entry. On return this may contain a bitwise + * OR of the following values. + */ + u32 flags; + + /* The resource pages have been assigned to the calling domain */ +#define _XENMEM_rsrc_acq_caller_owned 0 +#define XENMEM_rsrc_acq_caller_owned (1u << _XENMEM_rsrc_acq_caller_owned) + + /* + * IN - the index of the initial frame to be mapped. This parameter + * is ignored if nr_frames is 0. + */ + u64 frame; + +#define XENMEM_resource_ioreq_server_frame_bufioreq 0 +#define XENMEM_resource_ioreq_server_frame_ioreq(n) (1 + (n)) + + /* + * IN/OUT - If the tools domain is PV then, upon return, frame_list + * will be populated with the MFNs of the resource. + * If the tools domain is HVM then it is expected that, on + * entry, frame_list will be populated with a list of GFNs + * that will be mapped to the MFNs of the resource. + * If -EIO is returned then the frame_list has only been + * partially mapped and it is up to the caller to unmap all + * the GFNs. + * This parameter may be NULL if nr_frames is 0. + */ + GUEST_HANDLE(xen_pfn_t)frame_list; +}; + +DEFINE_GUEST_HANDLE_STRUCT(xen_mem_acquire_resource); + +#endif /* __XEN_PUBLIC_MEMORY_H__ */ diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h new file mode 100644 index 0000000000..0f12dcf267 --- /dev/null +++ b/include/xen/interface/sched.h @@ -0,0 +1,188 @@ +/****************************************************************************** + * sched.h + * + * Scheduler state interactions + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2005, Keir Fraser <keir at xensource.com> + */ + +#ifndef __XEN_PUBLIC_SCHED_H__ +#define __XEN_PUBLIC_SCHED_H__ + +#include <xen/interface/event_channel.h> + +/* + * Guest Scheduler Operations + * + * The SCHEDOP interface provides mechanisms for a guest to interact + * with the scheduler, including yield, blocking and shutting itself + * down. + */ + +/* + * The prototype for this hypercall is: + * long HYPERVISOR_sched_op(enum sched_op cmd, void *arg, ...) + * + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == Operation-specific extra argument(s), as described below. + * ... == Additional Operation-specific extra arguments, described below. + * + * Versions of Xen prior to 3.0.2 provided only the following legacy version + * of this hypercall, supporting only the commands yield, block and shutdown: + * long sched_op(int cmd, unsigned long arg) + * @cmd == SCHEDOP_??? (scheduler operation). + * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) + * == SHUTDOWN_* code (SCHEDOP_shutdown) + * + * This legacy version is available to new guests as: + * long HYPERVISOR_sched_op_compat(enum sched_op cmd, unsigned long arg) + */ + +/* + * Voluntarily yield the CPU. + * @arg == NULL. + */ +#define SCHEDOP_yield 0 + +/* + * Block execution of this VCPU until an event is received for processing. + * If called with event upcalls masked, this operation will atomically + * reenable event delivery and check for pending events before blocking the + * VCPU. This avoids a "wakeup waiting" race. + * @arg == NULL. + */ +#define SCHEDOP_block 1 + +/* + * Halt execution of this domain (all VCPUs) and notify the system controller. + * @arg == pointer to sched_shutdown structure. + * + * If the sched_shutdown_t reason is SHUTDOWN_suspend then + * x86 PV guests must also set RDX (EDX for 32-bit guests) to the MFN + * of the guest's start info page. RDX/EDX is the third hypercall + * argument. + * + * In addition, which reason is SHUTDOWN_suspend this hypercall + * returns 1 if suspend was cancelled or the domain was merely + * checkpointed, and 0 if it is resuming in a new domain. + */ +#define SCHEDOP_shutdown 2 + +/* + * Poll a set of event-channel ports. Return when one or more are pending. An + * optional timeout may be specified. + * @arg == pointer to sched_poll structure. + */ +#define SCHEDOP_poll 3 + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown 4 + +/* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == sched_shutdown, as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 + +/* + * Override the current vcpu affinity by pinning it to one physical cpu or + * undo this override restoring the previous affinity. + * @arg == pointer to sched_pin_override structure. + * + * A negative pcpu value will undo a previous pin override and restore the + * previous cpu affinity. + * This call is allowed for the hardware domain only and requires the cpu + * to be part of the domain's cpupool. + */ +#define SCHEDOP_pin_override 7 + +struct sched_shutdown { + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); + +struct sched_poll { + GUEST_HANDLE(evtchn_port_t)ports; + unsigned int nr_ports; + u64 timeout; +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_poll); + +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_* => shutdown reason */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_remote_shutdown); + +struct sched_watchdog { + u32 id; /* watchdog ID */ + u32 timeout; /* timeout */ +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_watchdog); + +struct sched_pin_override { + s32 pcpu; +}; + +DEFINE_GUEST_HANDLE_STRUCT(sched_pin_override); + +/* + * Reason codes for SCHEDOP_shutdown. These may be interpreted by control + * software to determine the appropriate action. For the most part, Xen does + * not care about the shutdown code. + */ +#define SHUTDOWN_poweroff 0 /* Domain exited normally. Clean up and kill. */ +#define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ +#define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ +#define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ + +/* + * Domain asked to perform 'soft reset' for it. The expected behavior is to + * reset internal Xen state for the domain returning it to the point where it + * was created but leaving the domain's memory contents and vCPU contexts + * intact. This will allow the domain to start over and set up all Xen specific + * interfaces again. + */ +#define SHUTDOWN_soft_reset 5 +#define SHUTDOWN_MAX 5 /* Maximum valid shutdown reason. */ + +#endif /* __XEN_PUBLIC_SCHED_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h new file mode 100644 index 0000000000..964daaedfb --- /dev/null +++ b/include/xen/interface/xen.h @@ -0,0 +1,225 @@ +/****************************************************************************** + * xen.h + * + * Guest OS interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2004, K A Fraser + */ + +#ifndef __XEN_PUBLIC_XEN_H__ +#define __XEN_PUBLIC_XEN_H__ + +#include <xen/arm/interface.h> + +/* + * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). + */ + +/* + * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. + * EAX = return value + * (argument registers may be clobbered on return) + * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6. + * RAX = return value + * (argument registers not clobbered on return; RCX, R11 are) + */ +#define __HYPERVISOR_set_trap_table 0 +#define __HYPERVISOR_mmu_update 1 +#define __HYPERVISOR_set_gdt 2 +#define __HYPERVISOR_stack_switch 3 +#define __HYPERVISOR_set_callbacks 4 +#define __HYPERVISOR_fpu_taskswitch 5 +#define __HYPERVISOR_sched_op_compat 6 +#define __HYPERVISOR_platform_op 7 +#define __HYPERVISOR_set_debugreg 8 +#define __HYPERVISOR_get_debugreg 9 +#define __HYPERVISOR_update_descriptor 10 +#define __HYPERVISOR_memory_op 12 +#define __HYPERVISOR_multicall 13 +#define __HYPERVISOR_update_va_mapping 14 +#define __HYPERVISOR_set_timer_op 15 +#define __HYPERVISOR_event_channel_op_compat 16 +#define __HYPERVISOR_xen_version 17 +#define __HYPERVISOR_console_io 18 +#define __HYPERVISOR_physdev_op_compat 19 +#define __HYPERVISOR_grant_table_op 20 +#define __HYPERVISOR_vm_assist 21 +#define __HYPERVISOR_update_va_mapping_otherdomain 22 +#define __HYPERVISOR_iret 23 /* x86 only */ +#define __HYPERVISOR_vcpu_op 24 +#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ +#define __HYPERVISOR_mmuext_op 26 +#define __HYPERVISOR_xsm_op 27 +#define __HYPERVISOR_nmi_op 28 +#define __HYPERVISOR_sched_op 29 +#define __HYPERVISOR_callback_op 30 +#define __HYPERVISOR_xenoprof_op 31 +#define __HYPERVISOR_event_channel_op 32 +#define __HYPERVISOR_physdev_op 33 +#define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 +#define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ +#define __HYPERVISOR_xenpmu_op 40 +#define __HYPERVISOR_dm_op 41 + +/* Architecture-specific hypercall definitions. */ +#define __HYPERVISOR_arch_0 48 +#define __HYPERVISOR_arch_1 49 +#define __HYPERVISOR_arch_2 50 +#define __HYPERVISOR_arch_3 51 +#define __HYPERVISOR_arch_4 52 +#define __HYPERVISOR_arch_5 53 +#define __HYPERVISOR_arch_6 54 +#define __HYPERVISOR_arch_7 55 + +#ifndef __ASSEMBLY__ + +typedef u16 domid_t; + +/* Domain ids >= DOMID_FIRST_RESERVED cannot be used for ordinary domains. */ +#define DOMID_FIRST_RESERVED (0x7FF0U) + +/* DOMID_SELF is used in certain contexts to refer to oneself. */ +#define DOMID_SELF (0x7FF0U) + +/* + * DOMID_IO is used to restrict page-table updates to mapping I/O memory. + * Although no Foreign Domain need be specified to map I/O pages, DOMID_IO + * is useful to ensure that no mappings to the OS's own heap are accidentally + * installed. (e.g., in Linux this could cause havoc as reference counts + * aren't adjusted on the I/O-mapping code path). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, but in that context can + * be specified by any calling domain. + */ +#define DOMID_IO (0x7FF1U) + +/* + * DOMID_XEN is used to allow privileged domains to map restricted parts of + * Xen's heap space (e.g., the machine_to_phys table). + * This only makes sense in MMUEXT_SET_FOREIGNDOM, and is only permitted if + * the caller is privileged. + */ +#define DOMID_XEN (0x7FF2U) + +/* DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +struct vcpu_info { + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + u8 evtchn_upcall_pending; + u8 evtchn_upcall_mask; + xen_ulong_t evtchn_pending_sel; + struct arch_vcpu_info arch; + struct pvclock_vcpu_time_info time; +}; /* 64 bytes (x86) */ + +/* + * Xen/kernel shared data -- pointer provided in start_info. + * NB. We expect that this struct is smaller than a page. + */ +struct shared_info { + struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + xen_ulong_t evtchn_pending[sizeof(xen_ulong_t) * 8]; + xen_ulong_t evtchn_mask[sizeof(xen_ulong_t) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ + struct pvclock_wall_clock wc; + + struct arch_shared_info arch; + +}; + +#else /* __ASSEMBLY__ */ + +/* In assembly code we cannot use C numeric constant suffixes. */ +#define mk_unsigned_long(x) x + +#endif /* !__ASSEMBLY__ */ + +#endif /* __XEN_PUBLIC_XEN_H__ */