diff mbox

[Xen-devel,v2,15/15] drivers/passthrough: arm: Add support for SMMU drivers

Message ID 1393193792-20008-16-git-send-email-julien.grall@linaro.org
State Superseded
Headers show

Commit Message

Julien Grall Feb. 23, 2014, 10:16 p.m. UTC
This patch add support for ARM architected SMMU driver. It's based on the
linux drivers (drivers/iommu/arm-smmu) commit 89ac23cd.

The major differences with the Linux driver are:
    - Fault by default if the SMMU is enabled to translate an
    address (Linux is bypassing the SMMU)
    - Using P2M page table instead of creating new one
    - Dropped stage-1 support
    - Dropped chained SMMUs support for now
    - Reworking device assignment and the different structures

Xen is programming each IOMMU by:
    - Using stage-2 mode translation
    - Sharing the page table with the processor
    - Injecting a fault if the device has made a wrong translation

Signed-off-by: Julien Grall<julien.grall@linaro.org>
Cc: Xiantao Zhang <xiantao.zhang@intel.com>
Cc: Jan Beulich <jbeulich@suse.com>

---
    Changes in v2:
        - Update commit message
        - Update some comments in the code
        - Add new callbacks to assign/reassign DT device
        - Rework init_dom0 and domain_teardown. The
        assignment/deassignement is now made in the generic code
        - Set protected field in dt_device_node when the device is under
        an IOMMU
        - Use SZ_64K and SZ_4K by the global PAGE_SIZE_{64,4}K in
        xen/iommu.h. The first one was not defined.
---
 xen/drivers/passthrough/arm/Makefile |    1 +
 xen/drivers/passthrough/arm/smmu.c   | 1736 ++++++++++++++++++++++++++++++++++
 xen/include/xen/iommu.h              |    3 +
 3 files changed, 1740 insertions(+)
 create mode 100644 xen/drivers/passthrough/arm/smmu.c
diff mbox

Patch

diff --git a/xen/drivers/passthrough/arm/Makefile b/xen/drivers/passthrough/arm/Makefile
index 0484b79..f4cd26e 100644
--- a/xen/drivers/passthrough/arm/Makefile
+++ b/xen/drivers/passthrough/arm/Makefile
@@ -1 +1,2 @@ 
 obj-y += iommu.o
+obj-y += smmu.o
diff --git a/xen/drivers/passthrough/arm/smmu.c b/xen/drivers/passthrough/arm/smmu.c
new file mode 100644
index 0000000..25f9d77
--- /dev/null
+++ b/xen/drivers/passthrough/arm/smmu.c
@@ -0,0 +1,1736 @@ 
+/*
+ * IOMMU API for ARM architected SMMU implementations.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Based on Linux drivers/iommu/arm-smmu.c (commit 89a23cd)
+ * Copyright (C) 2013 ARM Limited
+ *
+ * Author: Will Deacon <will.deacon@arm.com>
+ *
+ * Xen modification:
+ * Julien Grall <julien.grall@linaro.org>
+ * Copyright (C) 2014 Linaro Limited.
+ *
+ * This driver currently supports:
+ *  - SMMUv1 and v2 implementations (didn't try v2 SMMU)
+ *  - Stream-matching and stream-indexing
+ *  - v7/v8 long-descriptor format
+ *  - Non-secure access to the SMMU
+ *  - 4k pages, p2m shared with the processor
+ *  - Up to 40-bit addressing
+ *  - Context fault reporting
+ */
+
+#include <xen/config.h>
+#include <xen/delay.h>
+#include <xen/errno.h>
+#include <xen/irq.h>
+#include <xen/lib.h>
+#include <xen/list.h>
+#include <xen/mm.h>
+#include <xen/vmap.h>
+#include <xen/rbtree.h>
+#include <xen/sched.h>
+#include <asm/atomic.h>
+#include <asm/device.h>
+#include <asm/io.h>
+#include <asm/platform.h>
+
+/* Driver options */
+#define SMMU_OPT_SECURE_CONFIG_ACCESS   (1 << 0)
+
+/* Maximum number of stream IDs assigned to a single device */
+#define MAX_MASTER_STREAMIDS    MAX_PHANDLE_ARGS
+
+/* Maximum stream ID */
+#define SMMU_MAX_STREAMIDS      (PAGE_SIZE_64K - 1)
+
+/* Maximum number of context banks per SMMU */
+#define SMMU_MAX_CBS        128
+
+/* Maximum number of mapping groups per SMMU */
+#define SMMU_MAX_SMRS       128
+
+/* SMMU global address space */
+#define SMMU_GR0(smmu)      ((smmu)->base)
+#define SMMU_GR1(smmu)      ((smmu)->base + (smmu)->pagesize)
+
+/*
+ * SMMU global address space with conditional offset to access secure aliases of
+ * non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448, nsGFSYNR0: 0x450)
+ */
+#define SMMU_GR0_NS(smmu)                                   \
+    ((smmu)->base +                                         \
+     ((smmu->options & SMMU_OPT_SECURE_CONFIG_ACCESS)    \
+        ? 0x400 : 0))
+
+/* Page table bits */
+#define SMMU_PTE_PAGE           (((pteval_t)3) << 0)
+#define SMMU_PTE_CONT           (((pteval_t)1) << 52)
+#define SMMU_PTE_AF             (((pteval_t)1) << 10)
+#define SMMU_PTE_SH_NS          (((pteval_t)0) << 8)
+#define SMMU_PTE_SH_OS          (((pteval_t)2) << 8)
+#define SMMU_PTE_SH_IS          (((pteval_t)3) << 8)
+
+#if PAGE_SIZE == PAGE_SIZE_4K
+#define SMMU_PTE_CONT_ENTRIES   16
+#elif PAGE_SIZE == PAGE_SIZE_64K
+#define SMMU_PTE_CONT_ENTRIES   32
+#else
+#define SMMU_PTE_CONT_ENTRIES   1
+#endif
+
+#define SMMU_PTE_CONT_SIZE      (PAGE_SIZE * SMMU_PTE_CONT_ENTRIES)
+#define SMMU_PTE_CONT_MASK      (~(SMMU_PTE_CONT_SIZE - 1))
+#define SMMU_PTE_HWTABLE_SIZE   (PTRS_PER_PTE * sizeof(pte_t))
+
+/* Stage-1 PTE */
+#define SMMU_PTE_AP_UNPRIV      (((pteval_t)1) << 6)
+#define SMMU_PTE_AP_RDONLY      (((pteval_t)2) << 6)
+#define SMMU_PTE_ATTRINDX_SHIFT 2
+#define SMMU_PTE_nG             (((pteval_t)1) << 11)
+
+/* Stage-2 PTE */
+#define SMMU_PTE_HAP_FAULT      (((pteval_t)0) << 6)
+#define SMMU_PTE_HAP_READ       (((pteval_t)1) << 6)
+#define SMMU_PTE_HAP_WRITE      (((pteval_t)2) << 6)
+#define SMMU_PTE_MEMATTR_OIWB   (((pteval_t)0xf) << 2)
+#define SMMU_PTE_MEMATTR_NC     (((pteval_t)0x5) << 2)
+#define SMMU_PTE_MEMATTR_DEV    (((pteval_t)0x1) << 2)
+
+/* Configuration registers */
+#define SMMU_GR0_sCR0           0x0
+#define SMMU_sCR0_CLIENTPD      (1 << 0)
+#define SMMU_sCR0_GFRE          (1 << 1)
+#define SMMU_sCR0_GFIE          (1 << 2)
+#define SMMU_sCR0_GCFGFRE       (1 << 4)
+#define SMMU_sCR0_GCFGFIE       (1 << 5)
+#define SMMU_sCR0_USFCFG        (1 << 10)
+#define SMMU_sCR0_VMIDPNE       (1 << 11)
+#define SMMU_sCR0_PTM           (1 << 12)
+#define SMMU_sCR0_FB            (1 << 13)
+#define SMMU_sCR0_BSU_SHIFT     14
+#define SMMU_sCR0_BSU_MASK      0x3
+
+/* Identification registers */
+#define SMMU_GR0_ID0            0x20
+#define SMMU_GR0_ID1            0x24
+#define SMMU_GR0_ID2            0x28
+#define SMMU_GR0_ID3            0x2c
+#define SMMU_GR0_ID4            0x30
+#define SMMU_GR0_ID5            0x34
+#define SMMU_GR0_ID6            0x38
+#define SMMU_GR0_ID7            0x3c
+#define SMMU_GR0_sGFSR          0x48
+#define SMMU_GR0_sGFSYNR0       0x50
+#define SMMU_GR0_sGFSYNR1       0x54
+#define SMMU_GR0_sGFSYNR2       0x58
+#define SMMU_GR0_PIDR0          0xfe0
+#define SMMU_GR0_PIDR1          0xfe4
+#define SMMU_GR0_PIDR2          0xfe8
+
+#define SMMU_ID0_S1TS           (1 << 30)
+#define SMMU_ID0_S2TS           (1 << 29)
+#define SMMU_ID0_NTS            (1 << 28)
+#define SMMU_ID0_SMS            (1 << 27)
+#define SMMU_ID0_PTFS_SHIFT     24
+#define SMMU_ID0_PTFS_MASK      0x2
+#define SMMU_ID0_PTFS_V8_ONLY   0x2
+#define SMMU_ID0_CTTW           (1 << 14)
+#define SMMU_ID0_NUMIRPT_SHIFT  16
+#define SMMU_ID0_NUMIRPT_MASK   0xff
+#define SMMU_ID0_NUMSMRG_SHIFT  0
+#define SMMU_ID0_NUMSMRG_MASK   0xff
+
+#define SMMU_ID1_PAGESIZE            (1 << 31)
+#define SMMU_ID1_NUMPAGENDXB_SHIFT   28
+#define SMMU_ID1_NUMPAGENDXB_MASK    7
+#define SMMU_ID1_NUMS2CB_SHIFT       16
+#define SMMU_ID1_NUMS2CB_MASK        0xff
+#define SMMU_ID1_NUMCB_SHIFT         0
+#define SMMU_ID1_NUMCB_MASK          0xff
+
+#define SMMU_ID2_OAS_SHIFT           4
+#define SMMU_ID2_OAS_MASK            0xf
+#define SMMU_ID2_IAS_SHIFT           0
+#define SMMU_ID2_IAS_MASK            0xf
+#define SMMU_ID2_UBS_SHIFT           8
+#define SMMU_ID2_UBS_MASK            0xf
+#define SMMU_ID2_PTFS_4K             (1 << 12)
+#define SMMU_ID2_PTFS_16K            (1 << 13)
+#define SMMU_ID2_PTFS_64K            (1 << 14)
+
+#define SMMU_PIDR2_ARCH_SHIFT        4
+#define SMMU_PIDR2_ARCH_MASK         0xf
+
+/* Global TLB invalidation */
+#define SMMU_GR0_STLBIALL           0x60
+#define SMMU_GR0_TLBIVMID           0x64
+#define SMMU_GR0_TLBIALLNSNH        0x68
+#define SMMU_GR0_TLBIALLH           0x6c
+#define SMMU_GR0_sTLBGSYNC          0x70
+#define SMMU_GR0_sTLBGSTATUS        0x74
+#define SMMU_sTLBGSTATUS_GSACTIVE   (1 << 0)
+#define SMMU_TLB_LOOP_TIMEOUT       1000000 /* 1s! */
+
+/* Stream mapping registers */
+#define SMMU_GR0_SMR(n)             (0x800 + ((n) << 2))
+#define SMMU_SMR_VALID              (1 << 31)
+#define SMMU_SMR_MASK_SHIFT         16
+#define SMMU_SMR_MASK_MASK          0x7fff
+#define SMMU_SMR_ID_SHIFT           0
+#define SMMU_SMR_ID_MASK            0x7fff
+
+#define SMMU_GR0_S2CR(n)        (0xc00 + ((n) << 2))
+#define SMMU_S2CR_CBNDX_SHIFT   0
+#define SMMU_S2CR_CBNDX_MASK    0xff
+#define SMMU_S2CR_TYPE_SHIFT    16
+#define SMMU_S2CR_TYPE_MASK     0x3
+#define SMMU_S2CR_TYPE_TRANS    (0 << SMMU_S2CR_TYPE_SHIFT)
+#define SMMU_S2CR_TYPE_BYPASS   (1 << SMMU_S2CR_TYPE_SHIFT)
+#define SMMU_S2CR_TYPE_FAULT    (2 << SMMU_S2CR_TYPE_SHIFT)
+
+/* Context bank attribute registers */
+#define SMMU_GR1_CBAR(n)                    (0x0 + ((n) << 2))
+#define SMMU_CBAR_VMID_SHIFT                0
+#define SMMU_CBAR_VMID_MASK                 0xff
+#define SMMU_CBAR_S1_MEMATTR_SHIFT          12
+#define SMMU_CBAR_S1_MEMATTR_MASK           0xf
+#define SMMU_CBAR_S1_MEMATTR_WB             0xf
+#define SMMU_CBAR_TYPE_SHIFT                16
+#define SMMU_CBAR_TYPE_MASK                 0x3
+#define SMMU_CBAR_TYPE_S2_TRANS             (0 << SMMU_CBAR_TYPE_SHIFT)
+#define SMMU_CBAR_TYPE_S1_TRANS_S2_BYPASS   (1 << SMMU_CBAR_TYPE_SHIFT)
+#define SMMU_CBAR_TYPE_S1_TRANS_S2_FAULT    (2 << SMMU_CBAR_TYPE_SHIFT)
+#define SMMU_CBAR_TYPE_S1_TRANS_S2_TRANS    (3 << SMMU_CBAR_TYPE_SHIFT)
+#define SMMU_CBAR_IRPTNDX_SHIFT             24
+#define SMMU_CBAR_IRPTNDX_MASK              0xff
+
+#define SMMU_GR1_CBA2R(n)                   (0x800 + ((n) << 2))
+#define SMMU_CBA2R_RW64_32BIT               (0 << 0)
+#define SMMU_CBA2R_RW64_64BIT               (1 << 0)
+
+/* Translation context bank */
+#define SMMU_CB_BASE(smmu)                  ((smmu)->base + ((smmu)->size >> 1))
+#define SMMU_CB(smmu, n)                    ((n) * (smmu)->pagesize)
+
+#define SMMU_CB_SCTLR                       0x0
+#define SMMU_CB_RESUME                      0x8
+#define SMMU_CB_TCR2                        0x10
+#define SMMU_CB_TTBR0_LO                    0x20
+#define SMMU_CB_TTBR0_HI                    0x24
+#define SMMU_CB_TCR                         0x30
+#define SMMU_CB_S1_MAIR0                    0x38
+#define SMMU_CB_FSR                         0x58
+#define SMMU_CB_FAR_LO                      0x60
+#define SMMU_CB_FAR_HI                      0x64
+#define SMMU_CB_FSYNR0                      0x68
+#define SMMU_CB_S1_TLBIASID                 0x610
+
+#define SMMU_SCTLR_S1_ASIDPNE               (1 << 12)
+#define SMMU_SCTLR_CFCFG                    (1 << 7)
+#define SMMU_SCTLR_CFIE                     (1 << 6)
+#define SMMU_SCTLR_CFRE                     (1 << 5)
+#define SMMU_SCTLR_E                        (1 << 4)
+#define SMMU_SCTLR_AFE                      (1 << 2)
+#define SMMU_SCTLR_TRE                      (1 << 1)
+#define SMMU_SCTLR_M                        (1 << 0)
+#define SMMU_SCTLR_EAE_SBOP                 (SMMU_SCTLR_AFE | SMMU_SCTLR_TRE)
+
+#define SMMU_RESUME_RETRY                   (0 << 0)
+#define SMMU_RESUME_TERMINATE               (1 << 0)
+
+#define SMMU_TCR_EAE                        (1 << 31)
+
+#define SMMU_TCR_PASIZE_SHIFT               16
+#define SMMU_TCR_PASIZE_MASK                0x7
+
+#define SMMU_TCR_TG0_4K                     (0 << 14)
+#define SMMU_TCR_TG0_64K                    (1 << 14)
+
+#define SMMU_TCR_SH0_SHIFT                  12
+#define SMMU_TCR_SH0_MASK                   0x3
+#define SMMU_TCR_SH_NS                      0
+#define SMMU_TCR_SH_OS                      2
+#define SMMU_TCR_SH_IS                      3
+
+#define SMMU_TCR_ORGN0_SHIFT                10
+#define SMMU_TCR_IRGN0_SHIFT                8
+#define SMMU_TCR_RGN_MASK                   0x3
+#define SMMU_TCR_RGN_NC                     0
+#define SMMU_TCR_RGN_WBWA                   1
+#define SMMU_TCR_RGN_WT                     2
+#define SMMU_TCR_RGN_WB                     3
+
+#define SMMU_TCR_SL0_SHIFT                  6
+#define SMMU_TCR_SL0_MASK                   0x3
+#define SMMU_TCR_SL0_LVL_2                  0
+#define SMMU_TCR_SL0_LVL_1                  1
+
+#define SMMU_TCR_T1SZ_SHIFT                 16
+#define SMMU_TCR_T0SZ_SHIFT                 0
+#define SMMU_TCR_SZ_MASK                    0xf
+
+#define SMMU_TCR2_SEP_SHIFT                 15
+#define SMMU_TCR2_SEP_MASK                  0x7
+
+#define SMMU_TCR2_PASIZE_SHIFT              0
+#define SMMU_TCR2_PASIZE_MASK               0x7
+
+/* Common definitions for PASize and SEP fields */
+#define SMMU_TCR2_ADDR_32                   0
+#define SMMU_TCR2_ADDR_36                   1
+#define SMMU_TCR2_ADDR_40                   2
+#define SMMU_TCR2_ADDR_42                   3
+#define SMMU_TCR2_ADDR_44                   4
+#define SMMU_TCR2_ADDR_48                   5
+
+#define SMMU_TTBRn_HI_ASID_SHIFT            16
+
+#define SMMU_MAIR_ATTR_SHIFT(n)             ((n) << 3)
+#define SMMU_MAIR_ATTR_MASK                 0xff
+#define SMMU_MAIR_ATTR_DEVICE               0x04
+#define SMMU_MAIR_ATTR_NC                   0x44
+#define SMMU_MAIR_ATTR_WBRWA                0xff
+#define SMMU_MAIR_ATTR_IDX_NC               0
+#define SMMU_MAIR_ATTR_IDX_CACHE            1
+#define SMMU_MAIR_ATTR_IDX_DEV              2
+
+#define SMMU_FSR_MULTI                      (1 << 31)
+#define SMMU_FSR_SS                         (1 << 30)
+#define SMMU_FSR_UUT                        (1 << 8)
+#define SMMU_FSR_ASF                        (1 << 7)
+#define SMMU_FSR_TLBLKF                     (1 << 6)
+#define SMMU_FSR_TLBMCF                     (1 << 5)
+#define SMMU_FSR_EF                         (1 << 4)
+#define SMMU_FSR_PF                         (1 << 3)
+#define SMMU_FSR_AFF                        (1 << 2)
+#define SMMU_FSR_TF                         (1 << 1)
+
+#define SMMU_FSR_IGN                        (SMMU_FSR_AFF | SMMU_FSR_ASF |    \
+                                             SMMU_FSR_TLBMCF | SMMU_FSR_TLBLKF)
+#define SMMU_FSR_FAULT                      (SMMU_FSR_MULTI | SMMU_FSR_SS |   \
+                                             SMMU_FSR_UUT | SMMU_FSR_EF |     \
+                                             SMMU_FSR_PF | SMMU_FSR_TF |      \
+                                             SMMU_FSR_IGN)
+
+#define SMMU_FSYNR0_WNR                     (1 << 4)
+
+#define smmu_print(dev, lvl, fmt, ...)                                        \
+    printk(lvl "smmu: %s: " fmt, dt_node_full_name(dev->node), ## __VA_ARGS__)
+
+#define smmu_err(dev, fmt, ...) smmu_print(dev, XENLOG_ERR, fmt, ## __VA_ARGS__)
+
+#define smmu_dbg(dev, fmt, ...)                                             \
+    smmu_print(dev, XENLOG_DEBUG, fmt, ## __VA_ARGS__)
+
+#define smmu_info(dev, fmt, ...)                                            \
+    smmu_print(dev, XENLOG_INFO, fmt, ## __VA_ARGS__)
+
+#define smmu_warn(dev, fmt, ...)                                            \
+    smmu_print(dev, XENLOG_WARNING, fmt, ## __VA_ARGS__)
+
+struct arm_smmu_device {
+    const struct dt_device_node *node;
+
+    void __iomem                *base;
+    unsigned long               size;
+    unsigned long               pagesize;
+
+#define SMMU_FEAT_COHERENT_WALK (1 << 0)
+#define SMMU_FEAT_STREAM_MATCH  (1 << 1)
+#define SMMU_FEAT_TRANS_S1      (1 << 2)
+#define SMMU_FEAT_TRANS_S2      (1 << 3)
+#define SMMU_FEAT_TRANS_NESTED  (1 << 4)
+    u32                         features;
+    u32                         options;
+    int                         version;
+
+    u32                         num_context_banks;
+    u32                         num_s2_context_banks;
+    DECLARE_BITMAP(context_map, SMMU_MAX_CBS);
+    atomic_t                    irptndx;
+
+    u32                         num_mapping_groups;
+    DECLARE_BITMAP(smr_map, SMMU_MAX_SMRS);
+
+    unsigned long               input_size;
+    unsigned long               s1_output_size;
+    unsigned long               s2_output_size;
+
+    u32                         num_global_irqs;
+    u32                         num_context_irqs;
+    struct dt_irq               *irqs;
+
+    u32                         smr_mask_mask;
+    u32                         smr_id_mask;
+
+    unsigned long               *sids;
+
+    struct list_head            list;
+    struct rb_root              masters;
+};
+
+struct arm_smmu_smr {
+    u8                          idx;
+    u16                         mask;
+    u16                         id;
+};
+
+#define INVALID_IRPTNDX         0xff
+
+#define SMMU_CB_ASID(cfg)       ((cfg)->cbndx)
+#define SMMU_CB_VMID(cfg)       ((cfg)->cbndx + 1)
+
+struct arm_smmu_domain_cfg {
+    struct arm_smmu_device  *smmu;
+    u8                      cbndx;
+    u8                      irptndx;
+    u32                     cbar;
+    /* Domain associated to this device */
+    struct domain           *domain;
+    /* List of master which use this structure */
+    struct list_head        masters;
+
+    /* Used to link domain context for a same domain */
+    struct list_head        list;
+};
+
+struct arm_smmu_master {
+    const struct dt_device_node *dt_node;
+
+    /*
+     * The following is specific to the master's position in the
+     * SMMU chain.
+     */
+    struct rb_node              node;
+    u32                         num_streamids;
+    u16                         streamids[MAX_MASTER_STREAMIDS];
+    int                         num_s2crs;
+
+    struct arm_smmu_smr         *smrs;
+    struct arm_smmu_domain_cfg  *cfg;
+
+    /* Used to link masters in a same domain context */
+    struct list_head            list;
+};
+
+static LIST_HEAD(arm_smmu_devices);
+
+struct arm_smmu_domain {
+    spinlock_t lock;
+    struct list_head contexts;
+};
+
+struct arm_smmu_option_prop {
+    u32         opt;
+    const char  *prop;
+};
+
+static const struct arm_smmu_option_prop arm_smmu_options [] __initconst =
+{
+    { SMMU_OPT_SECURE_CONFIG_ACCESS, "calxeda,smmu-secure-config-access" },
+    { 0, NULL},
+};
+
+static void __init check_driver_options(struct arm_smmu_device *smmu)
+{
+    int i = 0;
+
+    do {
+        if ( dt_property_read_bool(smmu->node, arm_smmu_options[i].prop) )
+        {
+            smmu->options |= arm_smmu_options[i].opt;
+            smmu_dbg(smmu, "option %s\n", arm_smmu_options[i].prop);
+        }
+    } while ( arm_smmu_options[++i].opt );
+}
+
+static void arm_smmu_context_fault(int irq, void *data,
+                                   struct cpu_user_regs *regs)
+{
+    u32 fsr, far, fsynr;
+    unsigned long iova;
+    struct arm_smmu_domain_cfg *cfg = data;
+    struct arm_smmu_device *smmu = cfg->smmu;
+    void __iomem *cb_base;
+
+    cb_base = SMMU_CB_BASE(smmu) + SMMU_CB(smmu, cfg->cbndx);
+    fsr = readl_relaxed(cb_base + SMMU_CB_FSR);
+
+    if ( !(fsr & SMMU_FSR_FAULT) )
+        return;
+
+    if ( fsr & SMMU_FSR_IGN )
+        smmu_err(smmu, "Unexpected context fault (fsr 0x%u)\n", fsr);
+
+    fsynr = readl_relaxed(cb_base + SMMU_CB_FSYNR0);
+    far = readl_relaxed(cb_base + SMMU_CB_FAR_LO);
+    iova = far;
+#ifdef CONFIG_ARM_64
+    far = readl_relaxed(cb_base + SMMU_CB_FAR_HI);
+    iova |= ((unsigned long)far << 32);
+#endif
+
+    smmu_err(smmu,
+             "Unhandled context fault: iova=0x%08lx, fsynr=0x%x, cb=%d\n",
+             iova, fsynr, cfg->cbndx);
+
+    /* Clear the faulting FSR */
+    writel(fsr, cb_base + SMMU_CB_FSR);
+
+    /* Terminate any stalled transactions */
+    if ( fsr & SMMU_FSR_SS )
+        writel_relaxed(SMMU_RESUME_TERMINATE, cb_base + SMMU_CB_RESUME);
+}
+
+static void arm_smmu_global_fault(int irq, void *data,
+                                  struct cpu_user_regs *regs)
+{
+    u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
+    struct arm_smmu_device *smmu = data;
+    void __iomem *gr0_base = SMMU_GR0_NS(smmu);
+
+    gfsr = readl_relaxed(gr0_base + SMMU_GR0_sGFSR);
+    gfsynr0 = readl_relaxed(gr0_base + SMMU_GR0_sGFSYNR0);
+    gfsynr1 = readl_relaxed(gr0_base + SMMU_GR0_sGFSYNR1);
+    gfsynr2 = readl_relaxed(gr0_base + SMMU_GR0_sGFSYNR2);
+
+    if ( !gfsr )
+        return;
+
+    smmu_err(smmu, "Unexpected global fault, this could be serious\n");
+    smmu_err(smmu,
+             "\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
+             gfsr, gfsynr0, gfsynr1, gfsynr2);
+    writel(gfsr, gr0_base + SMMU_GR0_sGFSR);
+}
+
+static struct arm_smmu_master *
+find_smmu_master(struct arm_smmu_device *smmu,
+                 const struct dt_device_node *dev_node)
+{
+    struct rb_node *node = smmu->masters.rb_node;
+
+    while ( node )
+    {
+        struct arm_smmu_master *master;
+
+        master = container_of(node, struct arm_smmu_master, node);
+
+        if ( dev_node < master->dt_node )
+            node = node->rb_left;
+        else if ( dev_node > master->dt_node )
+            node = node->rb_right;
+        else
+            return master;
+    }
+
+    return NULL;
+}
+
+static __init int insert_smmu_master(struct arm_smmu_device *smmu,
+                                     struct arm_smmu_master *master)
+{
+    struct rb_node **new, *parent;
+
+    new = &smmu->masters.rb_node;
+    parent = NULL;
+    while ( *new )
+    {
+        struct arm_smmu_master *this;
+
+        this = container_of(*new, struct arm_smmu_master, node);
+
+        parent = *new;
+        if ( master->dt_node < this->dt_node )
+            new = &((*new)->rb_left);
+        else if (master->dt_node > this->dt_node)
+            new = &((*new)->rb_right);
+        else
+            return -EEXIST;
+    }
+
+    rb_link_node(&master->node, parent, new);
+    rb_insert_color(&master->node, &smmu->masters);
+    return 0;
+}
+
+static __init int register_smmu_master(struct arm_smmu_device *smmu,
+                                       struct dt_phandle_args *masterspec)
+{
+    int i, sid;
+    struct arm_smmu_master *master;
+    int rc = 0;
+
+    smmu_dbg(smmu, "Try to add master %s\n", masterspec->np->name);
+
+    master = find_smmu_master(smmu, masterspec->np);
+    if ( master )
+    {
+        smmu_err(smmu,
+                 "rejecting multiple registrations for master device %s\n",
+                 masterspec->np->name);
+        return -EBUSY;
+    }
+
+    if ( masterspec->args_count > MAX_MASTER_STREAMIDS )
+    {
+        smmu_err(smmu,
+            "reached maximum number (%d) of stream IDs for master device %s\n",
+            MAX_MASTER_STREAMIDS, masterspec->np->name);
+        return -ENOSPC;
+    }
+
+    master = xzalloc(struct arm_smmu_master);
+    if ( !master )
+        return -ENOMEM;
+
+    INIT_LIST_HEAD(&master->list);
+    master->dt_node = masterspec->np;
+    master->num_streamids = masterspec->args_count;
+
+    dt_device_set_protected(masterspec->np);
+
+    for ( i = 0; i < master->num_streamids; ++i )
+    {
+        sid = masterspec->args[i];
+        if ( test_and_set_bit(sid, smmu->sids) )
+        {
+            smmu_err(smmu, "duplicate stream ID (%d)\n", sid);
+            xfree(master);
+            return -EEXIST;
+        }
+        master->streamids[i] = masterspec->args[i];
+    }
+
+    rc = insert_smmu_master(smmu, master);
+    /* Insertion should never fail */
+    ASSERT(rc == 0);
+
+    return 0;
+}
+
+static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
+{
+    int idx;
+
+    do
+    {
+        idx = find_next_zero_bit(map, end, start);
+        if ( idx == end )
+            return -ENOSPC;
+    } while ( test_and_set_bit(idx, map) );
+
+    return idx;
+}
+
+static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
+{
+    clear_bit(idx, map);
+}
+
+static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
+{
+    int count = 0;
+    void __iomem *gr0_base = SMMU_GR0(smmu);
+
+    writel_relaxed(0, gr0_base + SMMU_GR0_sTLBGSYNC);
+    while ( readl_relaxed(gr0_base + SMMU_GR0_sTLBGSTATUS) &
+            SMMU_sTLBGSTATUS_GSACTIVE )
+    {
+        cpu_relax();
+        if ( ++count == SMMU_TLB_LOOP_TIMEOUT )
+        {
+            smmu_err(smmu, "TLB sync timed out -- SMMU may be deadlocked\n");
+            return;
+        }
+        udelay(1);
+    }
+}
+
+static void arm_smmu_tlb_inv_context(struct arm_smmu_domain_cfg *cfg)
+{
+    struct arm_smmu_device *smmu = cfg->smmu;
+    void __iomem *base = SMMU_GR0(smmu);
+
+    writel_relaxed(SMMU_CB_VMID(cfg),
+                   base + SMMU_GR0_TLBIVMID);
+
+    arm_smmu_tlb_sync(smmu);
+}
+
+static void arm_smmu_iotlb_flush_all(struct domain *d)
+{
+    struct arm_smmu_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
+    struct arm_smmu_domain_cfg *cfg;
+
+    spin_lock(&smmu_domain->lock);
+    list_for_each_entry(cfg, &smmu_domain->contexts, list)
+        arm_smmu_tlb_inv_context(cfg);
+    spin_unlock(&smmu_domain->lock);
+}
+
+static void arm_smmu_iotlb_flush(struct domain *d, unsigned long gfn,
+                                 unsigned int page_count)
+{
+    /* ARM SMMU v1 doesn't have flush by VMA and VMID */
+    arm_smmu_iotlb_flush_all(d);
+}
+
+static int determine_smr_mask(struct arm_smmu_device *smmu,
+                              struct arm_smmu_master *master,
+                              struct arm_smmu_smr *smr, int start, int order)
+{
+    u16 i, zero_bits_mask, one_bits_mask, const_mask;
+    int nr;
+
+    nr = 1 << order;
+
+    if ( nr == 1 )
+    {
+        /* no mask, use streamid to match and be done with it */
+        smr->mask = 0;
+        smr->id = master->streamids[start];
+        return 0;
+    }
+
+    zero_bits_mask = 0;
+    one_bits_mask = 0xffff;
+    for ( i = start; i < start + nr; i++)
+    {
+        zero_bits_mask |= master->streamids[i];   /* const 0 bits */
+        one_bits_mask &= master->streamids[i]; /* const 1 bits */
+    }
+    zero_bits_mask = ~zero_bits_mask;
+
+    /* bits having constant values (either 0 or 1) */
+    const_mask = zero_bits_mask | one_bits_mask;
+
+    i = hweight16(~const_mask);
+    if ( (1 << i) == nr )
+    {
+        smr->mask = ~const_mask;
+        smr->id = one_bits_mask;
+    }
+    else
+        /* no usable mask for this set of streamids */
+        return 1;
+
+    if ( ((smr->mask & smmu->smr_mask_mask) != smr->mask) ||
+         ((smr->id & smmu->smr_id_mask) != smr->id) )
+        /* insufficient number of mask/id bits */
+        return 1;
+
+    return 0;
+}
+
+static int determine_smr_mapping(struct arm_smmu_device *smmu,
+                                 struct arm_smmu_master *master,
+                                 struct arm_smmu_smr *smrs, int max_smrs)
+{
+    int nr_sid, nr, i, bit, start;
+
+    /*
+     * This function is called only once -- when a master is added
+     * to a domain. If master->num_s2crs != 0 then this master
+     * was already added to a domain.
+     */
+    BUG_ON(master->num_s2crs);
+
+    start = nr = 0;
+    nr_sid = master->num_streamids;
+    do
+    {
+        /*
+         * largest power-of-2 number of streamids for which to
+         * determine a usable mask/id pair for stream matching
+         */
+        bit = fls(nr_sid);
+        if (!bit)
+            return 0;
+
+        /*
+         * iterate over power-of-2 numbers to determine
+         * largest possible mask/id pair for stream matching
+         * of next 2**i streamids
+         */
+        for ( i = bit - 1; i >= 0; i-- )
+        {
+            if( !determine_smr_mask(smmu, master,
+                                    &smrs[master->num_s2crs],
+                                    start, i))
+                break;
+        }
+
+        if ( i < 0 )
+            goto out;
+
+        nr = 1 << i;
+        nr_sid -= nr;
+        start += nr;
+        master->num_s2crs++;
+    } while ( master->num_s2crs <= max_smrs );
+
+out:
+    if ( nr_sid )
+    {
+        /* not enough mapping groups available */
+        master->num_s2crs = 0;
+        return -ENOSPC;
+    }
+
+    return 0;
+}
+
+static int arm_smmu_master_configure_smrs(struct arm_smmu_device *smmu,
+                                          struct arm_smmu_master *master)
+{
+    int i, max_smrs, ret;
+    struct arm_smmu_smr *smrs;
+    void __iomem *gr0_base = SMMU_GR0(smmu);
+
+    if ( !(smmu->features & SMMU_FEAT_STREAM_MATCH) )
+        return 0;
+
+    if ( master->smrs )
+        return -EEXIST;
+
+    max_smrs = min(smmu->num_mapping_groups, master->num_streamids);
+    smrs = xmalloc_array(struct arm_smmu_smr, max_smrs);
+    if ( !smrs )
+    {
+        smmu_err(smmu, "failed to allocated %d SMRs for master %s\n",
+                 max_smrs, dt_node_name(master->dt_node));
+        return -ENOMEM;
+    }
+
+    ret = determine_smr_mapping(smmu, master, smrs, max_smrs);
+    if ( ret )
+        goto err_free_smrs;
+
+    /* Allocate the SMRs on the root SMMU */
+    for ( i = 0; i < master->num_s2crs; ++i )
+    {
+        int idx = __arm_smmu_alloc_bitmap(smmu->smr_map, 0,
+                                          smmu->num_mapping_groups);
+        if ( idx < 0 )
+        {
+            smmu_err(smmu, "failed to allocate free SMR\n");
+            goto err_free_bitmap;
+        }
+        smrs[i].idx = idx;
+    }
+
+    /* It worked! Now, poke the actual hardware */
+    for ( i = 0; i < master->num_s2crs; ++i )
+    {
+        u32 reg = SMMU_SMR_VALID | smrs[i].id << SMMU_SMR_ID_SHIFT |
+            smrs[i].mask << SMMU_SMR_MASK_SHIFT;
+        smmu_dbg(smmu, "SMR%d: 0x%x\n", smrs[i].idx, reg);
+        writel_relaxed(reg, gr0_base + SMMU_GR0_SMR(smrs[i].idx));
+    }
+
+    master->smrs = smrs;
+    return 0;
+
+err_free_bitmap:
+    while (--i >= 0)
+        __arm_smmu_free_bitmap(smmu->smr_map, smrs[i].idx);
+    master->num_s2crs = 0;
+err_free_smrs:
+    xfree(smrs);
+    return -ENOSPC;
+}
+
+/* Forward declaration */
+static void arm_smmu_destroy_domain_context(struct arm_smmu_domain_cfg *cfg);
+
+static int arm_smmu_domain_add_master(struct domain *d,
+                                      struct arm_smmu_domain_cfg *cfg,
+                                      struct arm_smmu_master *master)
+{
+    int i, ret;
+    struct arm_smmu_device *smmu = cfg->smmu;
+    void __iomem *gr0_base = SMMU_GR0(smmu);
+    struct arm_smmu_smr *smrs = master->smrs;
+
+    if ( master->cfg )
+        return -EBUSY;
+
+    ret = arm_smmu_master_configure_smrs(smmu, master);
+    if ( ret )
+        return ret;
+
+    /* Now we're at the root, time to point at our context bank */
+    if ( !master->num_s2crs )
+        master->num_s2crs = master->num_streamids;
+
+    for ( i = 0; i < master->num_s2crs; ++i )
+    {
+        u32 idx, s2cr;
+
+        idx = smrs ? smrs[i].idx : master->streamids[i];
+        s2cr = (SMMU_S2CR_TYPE_TRANS << SMMU_S2CR_TYPE_SHIFT) |
+            (cfg->cbndx << SMMU_S2CR_CBNDX_SHIFT);
+        smmu_dbg(smmu, "S2CR%d: 0x%x\n", idx, s2cr);
+        writel_relaxed(s2cr, gr0_base + SMMU_GR0_S2CR(idx));
+    }
+
+    master->cfg = cfg;
+    list_add(&master->list, &cfg->masters);
+
+    return 0;
+}
+
+static void arm_smmu_domain_remove_master(struct arm_smmu_master *master)
+{
+    int i;
+    struct arm_smmu_domain_cfg *cfg = master->cfg;
+    struct arm_smmu_device *smmu = cfg->smmu;
+    void __iomem *gr0_base = SMMU_GR0(smmu);
+    struct arm_smmu_smr *smrs = master->smrs;
+
+    /*
+     * We *must* clear the S2CR first, because freeing the SMR means
+     * that it can be reallocated immediately
+     */
+    for ( i = 0; i < master->num_streamids; ++i )
+    {
+        u16 sid = master->streamids[i];
+        writel_relaxed(SMMU_S2CR_TYPE_FAULT,
+                       gr0_base + SMMU_GR0_S2CR(sid));
+    }
+
+    /* Invalidate the SMRs before freeing back to the allocator */
+    for (i = 0; i < master->num_streamids; ++i) {
+        u8 idx = smrs[i].idx;
+        writel_relaxed(~SMMU_SMR_VALID, gr0_base + SMMU_GR0_SMR(idx));
+        __arm_smmu_free_bitmap(smmu->smr_map, idx);
+    }
+
+    master->smrs = NULL;
+    xfree(smrs);
+
+    master->cfg = NULL;
+    list_del(&master->list);
+    INIT_LIST_HEAD(&master->list);
+}
+
+static void arm_smmu_init_context_bank(struct arm_smmu_domain_cfg *cfg)
+{
+    u32 reg;
+    struct arm_smmu_device *smmu = cfg->smmu;
+    void __iomem *cb_base, *gr0_base, *gr1_base;
+    paddr_t p2maddr;
+
+    ASSERT(cfg->domain != NULL);
+    p2maddr = page_to_maddr(cfg->domain->arch.p2m.first_level);
+
+    gr0_base = SMMU_GR0(smmu);
+    gr1_base = SMMU_GR1(smmu);
+    cb_base = SMMU_CB_BASE(smmu) + SMMU_CB(smmu, cfg->cbndx);
+
+    /* CBAR */
+    reg = cfg->cbar;
+    if ( smmu->version == 1 )
+        reg |= cfg->irptndx << SMMU_CBAR_IRPTNDX_SHIFT;
+
+    reg |= SMMU_CB_VMID(cfg) << SMMU_CBAR_VMID_SHIFT;
+    writel_relaxed(reg, gr1_base + SMMU_GR1_CBAR(cfg->cbndx));
+
+    if ( smmu->version > 1 )
+    {
+        /* CBA2R */
+#ifdef CONFIG_ARM_64
+        reg = SMMU_CBA2R_RW64_64BIT;
+#else
+        reg = SMMU_CBA2R_RW64_32BIT;
+#endif
+        writel_relaxed(reg, gr1_base + SMMU_GR1_CBA2R(cfg->cbndx));
+    }
+
+    /* TTBR0 */
+    reg = (p2maddr & ((1ULL << 32) - 1));
+    writel_relaxed(reg, cb_base + SMMU_CB_TTBR0_LO);
+    reg = (p2maddr >> 32);
+    writel_relaxed(reg, cb_base + SMMU_CB_TTBR0_HI);
+
+    /*
+     * TCR
+     * We use long descriptor, with inner-shareable WBWA tables in TTBR0.
+     */
+    if ( smmu->version > 1 )
+    {
+        /* 4K Page Table */
+        if ( PAGE_SIZE == PAGE_SIZE_4K )
+            reg = SMMU_TCR_TG0_4K;
+        else
+            reg = SMMU_TCR_TG0_64K;
+
+        switch ( smmu->s2_output_size ) {
+        case 32:
+            reg |= (SMMU_TCR2_ADDR_32 << SMMU_TCR_PASIZE_SHIFT);
+            break;
+        case 36:
+            reg |= (SMMU_TCR2_ADDR_36 << SMMU_TCR_PASIZE_SHIFT);
+            break;
+        case 40:
+            reg |= (SMMU_TCR2_ADDR_40 << SMMU_TCR_PASIZE_SHIFT);
+            break;
+        case 42:
+            reg |= (SMMU_TCR2_ADDR_42 << SMMU_TCR_PASIZE_SHIFT);
+            break;
+        case 44:
+            reg |= (SMMU_TCR2_ADDR_44 << SMMU_TCR_PASIZE_SHIFT);
+            break;
+        case 48:
+            reg |= (SMMU_TCR2_ADDR_48 << SMMU_TCR_PASIZE_SHIFT);
+            break;
+        }
+    }
+    else
+        reg = 0;
+
+    /* The attribute to walk the page table should be the same as VTCR_EL2 */
+    reg |= SMMU_TCR_EAE |
+        (SMMU_TCR_SH_NS << SMMU_TCR_SH0_SHIFT) |
+        (SMMU_TCR_RGN_WBWA << SMMU_TCR_ORGN0_SHIFT) |
+        (SMMU_TCR_RGN_WBWA << SMMU_TCR_IRGN0_SHIFT) |
+        (SMMU_TCR_SL0_LVL_1 << SMMU_TCR_SL0_SHIFT);
+    writel_relaxed(reg, cb_base + SMMU_CB_TCR);
+
+    /* SCTLR */
+    reg = SMMU_SCTLR_CFCFG |
+        SMMU_SCTLR_CFIE |
+        SMMU_SCTLR_CFRE |
+        SMMU_SCTLR_M |
+        SMMU_SCTLR_EAE_SBOP;
+
+    writel_relaxed(reg, cb_base + SMMU_CB_SCTLR);
+}
+
+static struct arm_smmu_domain_cfg *
+arm_smmu_alloc_domain_context(struct domain *d,
+                              struct arm_smmu_device *smmu)
+{
+    const struct dt_irq *irq;
+    int ret, start;
+    struct arm_smmu_domain_cfg *cfg;
+    struct arm_smmu_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
+
+    ASSERT(spin_is_locked(&smmu_domain->lock));
+
+    cfg = xzalloc(struct arm_smmu_domain_cfg);
+    if ( !cfg )
+        return NULL;
+
+    /* Master already initialized to another domain ... */
+    if ( cfg->domain != NULL )
+        goto out_free_mem;
+
+    cfg->cbar = SMMU_CBAR_TYPE_S2_TRANS;
+    start = 0;
+
+    ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
+                                  smmu->num_context_banks);
+    if ( ret < 0 )
+        goto out_free_mem;
+
+    cfg->cbndx = ret;
+    if ( smmu->version == 1 )
+    {
+        cfg->irptndx = atomic_inc_return(&smmu->irptndx);
+        cfg->irptndx %= smmu->num_context_irqs;
+    }
+    else
+        cfg->irptndx = cfg->cbndx;
+
+    irq = &smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
+    ret = request_dt_irq(irq, arm_smmu_context_fault,
+                         "arm-smmu-context-fault", cfg);
+    if ( ret )
+    {
+        smmu_err(smmu, "failed to request context IRQ %d (%u)\n",
+                 cfg->irptndx, irq->irq);
+        cfg->irptndx = INVALID_IRPTNDX;
+        goto out_free_context;
+    }
+
+    cfg->domain = d;
+    cfg->smmu = smmu;
+
+    arm_smmu_init_context_bank(cfg);
+    list_add(&cfg->list, &smmu_domain->contexts);
+    INIT_LIST_HEAD(&cfg->masters);
+
+    return cfg;
+
+out_free_context:
+    __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+out_free_mem:
+    xfree(cfg);
+
+    return NULL;
+}
+
+static void arm_smmu_destroy_domain_context(struct arm_smmu_domain_cfg *cfg)
+{
+    struct domain *d = cfg->domain;
+    struct arm_smmu_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
+    struct arm_smmu_device *smmu = cfg->smmu;
+    void __iomem *cb_base;
+    const struct dt_irq *irq;
+
+    ASSERT(spin_is_locked(&smmu_domain->lock));
+    BUG_ON(!list_empty(&cfg->masters));
+
+    /* Disable the context bank and nuke the TLB before freeing it */
+    cb_base = SMMU_CB_BASE(smmu) + SMMU_CB(smmu, cfg->cbndx);
+    writel_relaxed(0, cb_base + SMMU_CB_SCTLR);
+    arm_smmu_tlb_inv_context(cfg);
+
+    if ( cfg->irptndx != INVALID_IRPTNDX )
+    {
+        irq = &smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
+        release_dt_irq(irq, cfg);
+    }
+
+    __arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
+    list_del(&cfg->list);
+    xfree(cfg);
+}
+
+static struct arm_smmu_device *
+arm_smmu_find_smmu_by_dev(const struct dt_device_node *dev)
+{
+    struct arm_smmu_device *smmu;
+    struct arm_smmu_master *master = NULL;
+
+    list_for_each_entry( smmu, &arm_smmu_devices, list )
+    {
+        master = find_smmu_master(smmu, dev);
+        if ( master )
+            break;
+    }
+
+    if ( !master )
+        return NULL;
+
+    return smmu;
+}
+
+static int arm_smmu_attach_dev(struct domain *d,
+                               const struct dt_device_node *dev)
+{
+    struct arm_smmu_device *smmu = arm_smmu_find_smmu_by_dev(dev);
+    struct arm_smmu_master *master;
+    struct arm_smmu_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
+    struct arm_smmu_domain_cfg *cfg = NULL;
+    struct arm_smmu_domain_cfg *curr;
+    int ret;
+
+    printk(XENLOG_DEBUG "arm-smmu: attach %s to domain %d\n",
+           dt_node_full_name(dev), d->domain_id);
+
+    if ( !smmu )
+    {
+        printk(XENLOG_ERR "%s: cannot attach to SMMU, is it on the same bus?\n",
+               dt_node_full_name(dev));
+        return -ENODEV;
+    }
+
+    master = find_smmu_master(smmu, dev);
+    BUG_ON(master == NULL);
+
+    /* Check if the device is already assigned to someone */
+    if ( master->cfg )
+        return -EBUSY;
+
+    spin_lock(&smmu_domain->lock);
+    list_for_each_entry( curr, &smmu_domain->contexts, list )
+    {
+        if ( curr->smmu == smmu )
+        {
+            cfg = curr;
+            break;
+        }
+    }
+
+    if ( !cfg )
+    {
+        cfg = arm_smmu_alloc_domain_context(d, smmu);
+        if ( !cfg )
+        {
+            smmu_err(smmu, "unable to allocate context for domain %u\n",
+                     d->domain_id);
+            spin_unlock(&smmu_domain->lock);
+            return -ENOMEM;
+        }
+    }
+    spin_unlock(&smmu_domain->lock);
+
+    ret = arm_smmu_domain_add_master(d, cfg, master);
+    if ( ret )
+    {
+        spin_lock(&smmu_domain->lock);
+        if ( list_empty(&cfg->masters) )
+            arm_smmu_destroy_domain_context(cfg);
+        spin_unlock(&smmu_domain->lock);
+    }
+
+    return ret;
+}
+
+static int arm_smmu_detach_dev(struct domain *d,
+                               const struct dt_device_node *dev)
+{
+    struct arm_smmu_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
+    struct arm_smmu_master *master;
+    struct arm_smmu_device *smmu = arm_smmu_find_smmu_by_dev(dev);
+    struct arm_smmu_domain_cfg *cfg;
+
+    printk(XENLOG_DEBUG "arm-smmu: detach %s to domain %d\n",
+           dt_node_full_name(dev), d->domain_id);
+
+    if ( !smmu )
+    {
+        printk(XENLOG_ERR "%s: cannot find the SMMU, is it on the same bus?\n",
+               dt_node_full_name(dev));
+        return -ENODEV;
+    }
+
+    master = find_smmu_master(smmu, dev);
+    BUG_ON(master == NULL);
+
+    cfg = master->cfg;
+
+    /* Sanity check to avoid removing a device that doesn't belong to
+     * the domain
+     */
+    if ( !cfg || cfg->domain != d )
+    {
+        printk(XENLOG_ERR "%s: was not attach to domain %d\n",
+               dt_node_full_name(dev), d->domain_id);
+        return -ESRCH;
+    }
+
+    arm_smmu_domain_remove_master(master);
+
+    spin_lock(&smmu_domain->lock);
+    if ( list_empty(&cfg->masters) )
+        arm_smmu_destroy_domain_context(cfg);
+    spin_unlock(&smmu_domain->lock);
+
+    return 0;
+}
+
+static int arm_smmu_reassign_dt_dev(struct domain *s, struct domain *t,
+                                    const struct dt_device_node *dev)
+{
+    int ret = 0;
+
+    /* Don't allow remapping on other domain than dom0 */
+    if ( t != dom0 )
+        return -EPERM;
+
+    if ( t == s )
+        return 0;
+
+    ret = arm_smmu_detach_dev(s, dev);
+    if ( ret )
+        return ret;
+
+    ret = arm_smmu_attach_dev(t, dev);
+
+    return ret;
+}
+
+static __init int arm_smmu_id_size_to_bits(int size)
+{
+    switch ( size )
+    {
+    case 0:
+        return 32;
+    case 1:
+        return 36;
+    case 2:
+        return 40;
+    case 3:
+        return 42;
+    case 4:
+        return 44;
+    case 5:
+    default:
+        return 48;
+    }
+}
+
+static __init int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
+{
+    unsigned long size;
+    void __iomem *gr0_base = SMMU_GR0(smmu);
+    u32 id;
+
+    smmu_info(smmu, "probing hardware configuration...\n");
+
+    /*
+     * Primecell ID
+     */
+    id = readl_relaxed(gr0_base + SMMU_GR0_PIDR2);
+    smmu->version = ((id >> SMMU_PIDR2_ARCH_SHIFT) & SMMU_PIDR2_ARCH_MASK) + 1;
+    smmu_info(smmu, "SMMUv%d with:\n", smmu->version);
+
+    /* ID0 */
+    id = readl_relaxed(gr0_base + SMMU_GR0_ID0);
+#ifndef CONFIG_ARM_64
+    if ( ((id >> SMMU_ID0_PTFS_SHIFT) & SMMU_ID0_PTFS_MASK) ==
+            SMMU_ID0_PTFS_V8_ONLY )
+    {
+        smmu_err(smmu, "\tno v7 descriptor support!\n");
+        return -ENODEV;
+    }
+#endif
+    if ( id & SMMU_ID0_S1TS )
+    {
+        smmu->features |= SMMU_FEAT_TRANS_S1;
+        smmu_info(smmu, "\tstage 1 translation\n");
+    }
+
+    if ( id & SMMU_ID0_S2TS )
+    {
+        smmu->features |= SMMU_FEAT_TRANS_S2;
+        smmu_info(smmu, "\tstage 2 translation\n");
+    }
+
+    if ( id & SMMU_ID0_NTS )
+    {
+        smmu->features |= SMMU_FEAT_TRANS_NESTED;
+        smmu_info(smmu, "\tnested translation\n");
+    }
+
+    if ( !(smmu->features &
+           (SMMU_FEAT_TRANS_S1 | SMMU_FEAT_TRANS_S2 |
+            SMMU_FEAT_TRANS_NESTED)) )
+    {
+        smmu_err(smmu, "\tno translation support!\n");
+        return -ENODEV;
+    }
+
+    /* We need at least support for Stage 2 */
+    if ( !(smmu->features & SMMU_FEAT_TRANS_S2) )
+    {
+        smmu_err(smmu, "\tno stage 2 translation!\n");
+        return -ENODEV;
+    }
+
+    if ( id & SMMU_ID0_CTTW )
+    {
+        smmu->features |= SMMU_FEAT_COHERENT_WALK;
+        smmu_info(smmu, "\tcoherent table walk\n");
+    }
+
+    if ( id & SMMU_ID0_SMS )
+    {
+        u32 smr, sid, mask;
+
+        smmu->features |= SMMU_FEAT_STREAM_MATCH;
+        smmu->num_mapping_groups = (id >> SMMU_ID0_NUMSMRG_SHIFT) &
+            SMMU_ID0_NUMSMRG_MASK;
+        if ( smmu->num_mapping_groups == 0 )
+        {
+            smmu_err(smmu,
+                     "stream-matching supported, but no SMRs present!\n");
+            return -ENODEV;
+        }
+
+        smr = SMMU_SMR_MASK_MASK << SMMU_SMR_MASK_SHIFT;
+        smr |= (SMMU_SMR_ID_MASK << SMMU_SMR_ID_SHIFT);
+        writel_relaxed(smr, gr0_base + SMMU_GR0_SMR(0));
+        smr = readl_relaxed(gr0_base + SMMU_GR0_SMR(0));
+
+        mask = (smr >> SMMU_SMR_MASK_SHIFT) & SMMU_SMR_MASK_MASK;
+        sid = (smr >> SMMU_SMR_ID_SHIFT) & SMMU_SMR_ID_MASK;
+        if ( (mask & sid) != sid )
+        {
+            smmu_err(smmu,
+                     "SMR mask bits (0x%x) insufficient for ID field (0x%x)\n",
+                     mask, sid);
+            return -ENODEV;
+        }
+        smmu->smr_mask_mask = mask;
+        smmu->smr_id_mask = sid;
+
+        smmu_info(smmu,
+                  "\tstream matching with %u register groups, mask 0x%x\n",
+                  smmu->num_mapping_groups, mask);
+    }
+
+    /* ID1 */
+    id = readl_relaxed(gr0_base + SMMU_GR0_ID1);
+    smmu->pagesize = (id & SMMU_ID1_PAGESIZE) ? PAGE_SIZE_64K : PAGE_SIZE_4K;
+
+    /* Check for size mismatch of SMMU address space from mapped region */
+    size = 1 << (((id >> SMMU_ID1_NUMPAGENDXB_SHIFT) &
+                  SMMU_ID1_NUMPAGENDXB_MASK) + 1);
+    size *= (smmu->pagesize << 1);
+    if ( smmu->size != size )
+        smmu_warn(smmu, "SMMU address space size (0x%lx) differs "
+                  "from mapped region size (0x%lx)!\n", size, smmu->size);
+
+    smmu->num_s2_context_banks = (id >> SMMU_ID1_NUMS2CB_SHIFT) &
+        SMMU_ID1_NUMS2CB_MASK;
+    smmu->num_context_banks = (id >> SMMU_ID1_NUMCB_SHIFT) &
+        SMMU_ID1_NUMCB_MASK;
+    if ( smmu->num_s2_context_banks > smmu->num_context_banks )
+    {
+        smmu_err(smmu, "impossible number of S2 context banks!\n");
+        return -ENODEV;
+    }
+    smmu_info(smmu, "\t%u context banks (%u stage-2 only)\n",
+              smmu->num_context_banks, smmu->num_s2_context_banks);
+
+    /* ID2 */
+    id = readl_relaxed(gr0_base + SMMU_GR0_ID2);
+    size = arm_smmu_id_size_to_bits((id >> SMMU_ID2_IAS_SHIFT) &
+                                    SMMU_ID2_IAS_MASK);
+
+    /*
+     * Stage-1 output limited by stage-2 input size due to VTCR_EL2
+     * setup (see setup_virt_paging)
+     */
+    /* Current maximum output size of 40 bits */
+    smmu->s1_output_size = min(40UL, size);
+
+    /* The stage-2 output mask is also applied for bypass */
+    size = arm_smmu_id_size_to_bits((id >> SMMU_ID2_OAS_SHIFT) &
+                                    SMMU_ID2_OAS_MASK);
+    smmu->s2_output_size = min((unsigned long)PADDR_BITS, size);
+
+    if ( smmu->version == 1 )
+        smmu->input_size = 32;
+    else
+    {
+#ifdef CONFIG_ARM_64
+        size = (id >> SMMU_ID2_UBS_SHIFT) & SMMU_ID2_UBS_MASK;
+        size = min(39, arm_smmu_id_size_to_bits(size));
+#else
+        size = 32;
+#endif
+        smmu->input_size = size;
+
+        if ( (PAGE_SIZE == PAGE_SIZE_4K && !(id & SMMU_ID2_PTFS_4K) ) ||
+             (PAGE_SIZE == PAGE_SIZE_64K && !(id & SMMU_ID2_PTFS_64K)) ||
+             (PAGE_SIZE != PAGE_SIZE_4K && PAGE_SIZE != PAGE_SIZE_64K) )
+        {
+            smmu_err(smmu, "CPU page size 0x%lx unsupported\n",
+                     PAGE_SIZE);
+            return -ENODEV;
+        }
+    }
+
+    smmu_info(smmu, "\t%lu-bit VA, %lu-bit IPA, %lu-bit PA\n",
+              smmu->input_size, smmu->s1_output_size, smmu->s2_output_size);
+    return 0;
+}
+
+static __init void arm_smmu_device_reset(struct arm_smmu_device *smmu)
+{
+    void __iomem *gr0_base = SMMU_GR0(smmu);
+    void __iomem *cb_base;
+    int i = 0;
+    u32 reg;
+
+    smmu_dbg(smmu, "device reset\n");
+
+    /* Clear Global FSR */
+    reg = readl_relaxed(SMMU_GR0_NS(smmu) + SMMU_GR0_sGFSR);
+    writel(reg, SMMU_GR0_NS(smmu) + SMMU_GR0_sGFSR);
+
+    /* Mark all SMRn as invalid and all S2CRn as fault */
+    for ( i = 0; i < smmu->num_mapping_groups; ++i )
+    {
+        writel_relaxed(~SMMU_SMR_VALID, gr0_base + SMMU_GR0_SMR(i));
+        writel_relaxed(SMMU_S2CR_TYPE_FAULT, gr0_base + SMMU_GR0_S2CR(i));
+    }
+
+    /* Make sure all context banks are disabled and clear CB_FSR  */
+    for ( i = 0; i < smmu->num_context_banks; ++i )
+    {
+        cb_base = SMMU_CB_BASE(smmu) + SMMU_CB(smmu, i);
+        writel_relaxed(0, cb_base + SMMU_CB_SCTLR);
+        writel_relaxed(SMMU_FSR_FAULT, cb_base + SMMU_CB_FSR);
+    }
+
+    /* Invalidate the TLB, just in case */
+    writel_relaxed(0, gr0_base + SMMU_GR0_STLBIALL);
+    writel_relaxed(0, gr0_base + SMMU_GR0_TLBIALLH);
+    writel_relaxed(0, gr0_base + SMMU_GR0_TLBIALLNSNH);
+
+    reg = readl_relaxed(SMMU_GR0_NS(smmu) + SMMU_GR0_sCR0);
+
+    /* Enable fault reporting */
+    reg |= (SMMU_sCR0_GFRE | SMMU_sCR0_GFIE |
+            SMMU_sCR0_GCFGFRE | SMMU_sCR0_GCFGFIE);
+
+    /* Disable TLB broadcasting. */
+    reg |= (SMMU_sCR0_VMIDPNE | SMMU_sCR0_PTM);
+
+    /* Enable client access, generate a fault if no mapping is found */
+    reg &= ~(SMMU_sCR0_CLIENTPD);
+    reg |= SMMU_sCR0_USFCFG;
+
+    /* Disable forced broadcasting */
+    reg &= ~SMMU_sCR0_FB;
+
+    /* Don't upgrade barriers */
+    reg &= ~(SMMU_sCR0_BSU_MASK << SMMU_sCR0_BSU_SHIFT);
+
+    /* Push the button */
+    arm_smmu_tlb_sync(smmu);
+    writel_relaxed(reg, SMMU_GR0_NS(smmu) + SMMU_GR0_sCR0);
+}
+
+int arm_smmu_iommu_domain_init(struct domain *d)
+{
+    struct arm_smmu_domain *smmu_domain;
+
+    smmu_domain = xzalloc(struct arm_smmu_domain);
+    if ( !smmu_domain )
+        return -ENOMEM;
+
+    spin_lock_init(&smmu_domain->lock);
+    INIT_LIST_HEAD(&smmu_domain->contexts);
+
+    domain_hvm_iommu(d)->arch.priv = smmu_domain;
+
+    return 0;
+}
+
+void arm_smmu_iommu_dom0_init(struct domain *d)
+{
+}
+
+void arm_smmu_iommu_domain_teardown(struct domain *d)
+{
+    struct arm_smmu_domain *smmu_domain = domain_hvm_iommu(d)->arch.priv;
+
+    ASSERT(list_empty(&smmu_domain->contexts));
+    xfree(smmu_domain);
+}
+
+static const struct iommu_ops arm_smmu_iommu_ops = {
+    .init = arm_smmu_iommu_domain_init,
+    .dom0_init = arm_smmu_iommu_dom0_init,
+    .teardown = arm_smmu_iommu_domain_teardown,
+    .iotlb_flush = arm_smmu_iotlb_flush,
+    .iotlb_flush_all = arm_smmu_iotlb_flush_all,
+    .assign_dt_device = arm_smmu_attach_dev,
+    .reassign_dt_device = arm_smmu_reassign_dt_dev,
+};
+
+static int __init smmu_init(struct dt_device_node *dev,
+                            const void *data)
+{
+    struct arm_smmu_device *smmu;
+    int res;
+    u64 addr, size;
+    unsigned int num_irqs, i;
+    struct dt_phandle_args masterspec;
+    struct rb_node *node;
+
+    /* Even if the device can't be initialized, we don't want to give to
+     * dom0 the smmu device
+     */
+    dt_device_set_used_by(dev, DOMID_XEN);
+
+    smmu = xzalloc(struct arm_smmu_device);
+    if ( !smmu )
+    {
+        printk(XENLOG_ERR "%s: failed to allocate arm_smmu_device\n",
+               dt_node_full_name(dev));
+        return -ENOMEM;
+    }
+
+    smmu->node = dev;
+    check_driver_options(smmu);
+
+    res = dt_device_get_address(smmu->node, 0, &addr, &size);
+    if ( res )
+    {
+        smmu_err(smmu, "unable to retrieve the base address of the SMMU\n");
+        goto out_err;
+    }
+
+    smmu->base = ioremap_nocache(addr, size);
+    if ( !smmu->base )
+    {
+        smmu_err(smmu, "unable to map the SMMU memory\n");
+        goto out_err;
+    }
+
+    smmu->size = size;
+
+    if ( !dt_property_read_u32(smmu->node, "#global-interrupts",
+                               &smmu->num_global_irqs) )
+    {
+        smmu_err(smmu, "missing #global-interrupts\n");
+        goto out_unmap;
+    }
+
+    num_irqs = dt_number_of_irq(smmu->node);
+    if ( num_irqs > smmu->num_global_irqs )
+        smmu->num_context_irqs = num_irqs - smmu->num_global_irqs;
+
+    if ( !smmu->num_context_irqs )
+    {
+        smmu_err(smmu, "found %d interrupts but expected at least %d\n",
+                 num_irqs, smmu->num_global_irqs + 1);
+        goto out_unmap;
+    }
+
+    smmu->irqs = xzalloc_array(struct dt_irq, num_irqs);
+    if ( !smmu->irqs )
+    {
+        smmu_err(smmu, "failed to allocated %d irqs\n", num_irqs);
+        goto out_unmap;
+    }
+
+    for ( i = 0; i < num_irqs; i++ )
+    {
+        res = dt_device_get_irq(smmu->node, i, &smmu->irqs[i]);
+        if ( res )
+        {
+            smmu_err(smmu, "failed to get irq index %d\n", i);
+            goto out_free_irqs;
+        }
+    }
+
+    smmu->sids = xzalloc_array(unsigned long,
+                               BITS_TO_LONGS(SMMU_MAX_STREAMIDS));
+    if ( !smmu->sids )
+    {
+        smmu_err(smmu, "failed to allocated bitmap for stream ID tracking\n");
+        goto out_free_masters;
+    }
+
+
+    i = 0;
+    smmu->masters = RB_ROOT;
+    while ( !dt_parse_phandle_with_args(smmu->node, "mmu-masters",
+                                        "#stream-id-cells", i, &masterspec) )
+    {
+        res = register_smmu_master(smmu, &masterspec);
+        if ( res )
+        {
+            smmu_err(smmu, "failed to add master %s\n",
+                     masterspec.np->name);
+            goto out_free_masters;
+        }
+        i++;
+    }
+
+    smmu_info(smmu, "registered %d master devices\n", i);
+
+    res = arm_smmu_device_cfg_probe(smmu);
+    if ( res )
+    {
+        smmu_err(smmu, "failed to probe the SMMU\n");
+        goto out_free_masters;
+    }
+
+    if ( smmu->version > 1 &&
+         smmu->num_context_banks != smmu->num_context_irqs )
+    {
+        smmu_err(smmu,
+                 "found only %d context interrupt(s) but %d required\n",
+                 smmu->num_context_irqs, smmu->num_context_banks);
+        goto out_free_masters;
+    }
+
+    smmu_dbg(smmu, "register global IRQs handler\n");
+
+    for ( i = 0; i < smmu->num_global_irqs; ++i )
+    {
+        smmu_dbg(smmu, "\t- global IRQ %u\n", smmu->irqs[i].irq);
+        res = request_dt_irq(&smmu->irqs[i], arm_smmu_global_fault,
+                             "arm-smmu global fault", smmu);
+        if ( res )
+        {
+            smmu_err(smmu, "failed to request global IRQ %d (%u)\n",
+                     i, smmu->irqs[i].irq);
+            goto out_release_irqs;
+        }
+    }
+
+    INIT_LIST_HEAD(&smmu->list);
+    list_add(&smmu->list, &arm_smmu_devices);
+
+    arm_smmu_device_reset(smmu);
+
+    iommu_set_ops(&arm_smmu_iommu_ops);
+
+    /* sids field can be freed... */
+    xfree(smmu->sids);
+    smmu->sids = NULL;
+
+    return 0;
+
+out_release_irqs:
+    while (i--)
+        release_dt_irq(&smmu->irqs[i], smmu);
+
+out_free_masters:
+    for ( node = rb_first(&smmu->masters); node; node = rb_next(node) )
+    {
+        struct arm_smmu_master *master;
+
+        master = container_of(node, struct arm_smmu_master, node);
+        xfree(master);
+    }
+
+    xfree(smmu->sids);
+
+out_free_irqs:
+    xfree(smmu->irqs);
+
+out_unmap:
+    iounmap(smmu->base);
+
+out_err:
+    xfree(smmu);
+
+    return -ENODEV;
+}
+
+static const char * const smmu_dt_compat[] __initconst =
+{
+    "arm,mmu-400",
+    NULL
+};
+
+DT_DEVICE_START(smmu, "ARM SMMU", DEVICE_IOMMU)
+    .compatible = smmu_dt_compat,
+    .init = smmu_init,
+DT_DEVICE_END
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/xen/iommu.h b/xen/include/xen/iommu.h
index 266cd6e..854e843 100644
--- a/xen/include/xen/iommu.h
+++ b/xen/include/xen/iommu.h
@@ -40,6 +40,9 @@  extern bool_t amd_iommu_perdev_intremap;
 #define PAGE_MASK_4K        (((u64)-1) << PAGE_SHIFT_4K)
 #define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
 
+#define PAGE_SHIFT_64K      (16)
+#define PAGE_SIZE_64K       (1UL << PAGE_SHIFT_64K)
+
 int iommu_setup(void);
 
 int iommu_add_device(struct pci_dev *pdev);