Message ID | 20220923123557.866972-3-thierry.reding@gmail.com |
---|---|
State | New |
Headers | show |
Series | iommu: Support mappings/reservations in reserved-memory regions | expand |
On 2022-09-23 13:35, Thierry Reding wrote: > From: Thierry Reding <treding@nvidia.com> > > This is an implementation that IOMMU drivers can use to obtain reserved > memory regions from a device tree node. It uses the reserved-memory DT > bindings to find the regions associated with a given device. If these > regions are marked accordingly, identity mappings will be created for > them in the IOMMU domain that the devices will be attached to. > > Cc: Frank Rowand <frowand.list@gmail.com> > Cc: devicetree@vger.kernel.org > Reviewed-by: Rob Herring <robh@kernel.org> > Signed-off-by: Thierry Reding <treding@nvidia.com> > --- > Changes in v9: > - address review comments by Robin Murphy: > - warn about non-direct mappings since they are not supported yet > - cleanup code to require less indentation > - narrow scope of variables > > Changes in v8: > - cleanup set-but-unused variables > > Changes in v6: > - remove reference to now unused dt-bindings/reserved-memory.h include > > Changes in v5: > - update for new "iommu-addresses" device tree bindings > > Changes in v4: > - fix build failure on !CONFIG_OF_ADDRESS > > Changes in v3: > - change "active" property to identity mapping flag that is part of the > memory region specifier (as defined by #memory-region-cells) to allow > per-reference flags to be used > > Changes in v2: > - use "active" property to determine whether direct mappings are needed > > drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++ > include/linux/of_iommu.h | 8 +++ > 2 files changed, 112 insertions(+) > > diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c > index 5696314ae69e..0bf2b08bca0a 100644 > --- a/drivers/iommu/of_iommu.c > +++ b/drivers/iommu/of_iommu.c > @@ -11,6 +11,7 @@ > #include <linux/module.h> > #include <linux/msi.h> > #include <linux/of.h> > +#include <linux/of_address.h> > #include <linux/of_iommu.h> > #include <linux/of_pci.h> > #include <linux/pci.h> > @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, > > return ops; > } > + > +static inline bool check_direct_mapping(struct device *dev, struct resource *phys, Where "phys" is the virtual address, right? :( > + phys_addr_t start, phys_addr_t end) > +{ > + if (start != phys->start || end != phys->end) { > + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", > + &phys, &start, &end); > + return false; > + } > + > + return true; > +} > + > +/** > + * of_iommu_get_resv_regions - reserved region driver helper for device tree > + * @dev: device for which to get reserved regions > + * @list: reserved region list > + * > + * IOMMU drivers can use this to implement their .get_resv_regions() callback > + * for memory regions attached to a device tree node. See the reserved-memory > + * device tree bindings on how to use these: > + * > + * Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt > + */ > +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) > +{ > +#if IS_ENABLED(CONFIG_OF_ADDRESS) > + struct of_phandle_iterator it; > + int err; > + > + of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) { > + const __be32 *maps, *end; > + struct resource res; > + int size; > + > + memset(&res, 0, sizeof(res)); > + > + /* > + * The "reg" property is optional and can be omitted by reserved-memory regions > + * that represent reservations in the IOVA space, which are regions that should > + * not be mapped. > + */ > + if (of_find_property(it.node, "reg", NULL)) { > + err = of_address_to_resource(it.node, 0, &res); > + if (err < 0) { > + dev_err(dev, "failed to parse memory region %pOF: %d\n", > + it.node, err); > + continue; > + } > + } > + > + maps = of_get_property(it.node, "iommu-addresses", &size); > + if (!maps) > + continue; > + > + end = maps + size / sizeof(__be32); > + > + while (maps < end) { > + struct device_node *np; > + u32 phandle; > + int na, ns; > + > + phandle = be32_to_cpup(maps++); > + np = of_find_node_by_phandle(phandle); > + na = of_n_addr_cells(np); > + ns = of_n_size_cells(np); > + > + if (np == dev->of_node) { > + int prot = IOMMU_READ | IOMMU_WRITE; > + struct iommu_resv_region *region; > + enum iommu_resv_type type; > + phys_addr_t start; > + size_t length; > + > + start = of_translate_dma_address(np, maps); > + length = of_read_number(maps + na, ns); > + > + /* > + * IOMMU regions without an associated physical region cannot be > + * mapped and are simply reservations. > + */ > + if (res.end > res.start) { > + phys_addr_t end = start + length - 1; > + > + if (check_direct_mapping(dev, &res, start, end)) > + type = IOMMU_RESV_DIRECT_RELAXABLE; Again I really don't think we should assume relaxable by default. Looking at the shape of things now, it seems like check_direct_mappings() wants to subsume the check on res as well and grow in to a more general function for determining the iommu_resv_type. Then we've got a clear place to start special-casing things like simple-framebuffer that we do know a bit more about. Thanks, Robin. > + else > + type = IOMMU_RESV_RESERVED; > + } else { > + type = IOMMU_RESV_RESERVED; > + } > + > + region = iommu_alloc_resv_region(start, length, prot, type); > + if (region) > + list_add_tail(®ion->list, list); > + } > + > + maps += na + ns; > + } > + } > +#endif > +} > +EXPORT_SYMBOL(of_iommu_get_resv_regions); > diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h > index 55c1eb300a86..9a5e6b410dd2 100644 > --- a/include/linux/of_iommu.h > +++ b/include/linux/of_iommu.h > @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev, > struct device_node *master_np, > const u32 *id); > > +extern void of_iommu_get_resv_regions(struct device *dev, > + struct list_head *list); > + > #else > > static inline const struct iommu_ops *of_iommu_configure(struct device *dev, > @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, > return NULL; > } > > +static inline void of_iommu_get_resv_regions(struct device *dev, > + struct list_head *list) > +{ > +} > + > #endif /* CONFIG_OF_IOMMU */ > > #endif /* __OF_IOMMU_H */
On Fri, Oct 07, 2022 at 02:47:23PM +0100, Robin Murphy wrote: > On 2022-09-23 13:35, Thierry Reding wrote: > > From: Thierry Reding <treding@nvidia.com> > > > > This is an implementation that IOMMU drivers can use to obtain reserved > > memory regions from a device tree node. It uses the reserved-memory DT > > bindings to find the regions associated with a given device. If these > > regions are marked accordingly, identity mappings will be created for > > them in the IOMMU domain that the devices will be attached to. > > > > Cc: Frank Rowand <frowand.list@gmail.com> > > Cc: devicetree@vger.kernel.org > > Reviewed-by: Rob Herring <robh@kernel.org> > > Signed-off-by: Thierry Reding <treding@nvidia.com> > > --- > > Changes in v9: > > - address review comments by Robin Murphy: > > - warn about non-direct mappings since they are not supported yet > > - cleanup code to require less indentation > > - narrow scope of variables > > > > Changes in v8: > > - cleanup set-but-unused variables > > > > Changes in v6: > > - remove reference to now unused dt-bindings/reserved-memory.h include > > > > Changes in v5: > > - update for new "iommu-addresses" device tree bindings > > > > Changes in v4: > > - fix build failure on !CONFIG_OF_ADDRESS > > > > Changes in v3: > > - change "active" property to identity mapping flag that is part of the > > memory region specifier (as defined by #memory-region-cells) to allow > > per-reference flags to be used > > > > Changes in v2: > > - use "active" property to determine whether direct mappings are needed > > > > drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++ > > include/linux/of_iommu.h | 8 +++ > > 2 files changed, 112 insertions(+) > > > > diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c > > index 5696314ae69e..0bf2b08bca0a 100644 > > --- a/drivers/iommu/of_iommu.c > > +++ b/drivers/iommu/of_iommu.c > > @@ -11,6 +11,7 @@ > > #include <linux/module.h> > > #include <linux/msi.h> > > #include <linux/of.h> > > +#include <linux/of_address.h> > > #include <linux/of_iommu.h> > > #include <linux/of_pci.h> > > #include <linux/pci.h> > > @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, > > return ops; > > } > > + > > +static inline bool check_direct_mapping(struct device *dev, struct resource *phys, > > Where "phys" is the virtual address, right? :( No, phys is actually res passed in from of_iommu_get_resv_regions() where it is the address read from the "reg" property. So that's the physical address of the reserved region. Perhaps it'd be useful to rename "res" to "phys" in that function to be a little more consistent. It's actually the "start" and "end" values that are passed into this function that refer to the I/O virtual addresses from iommu-addresses. > > > + phys_addr_t start, phys_addr_t end) > > +{ > > + if (start != phys->start || end != phys->end) { > > + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", > > + &phys, &start, &end); > > + return false; > > + } > > + > > + return true; > > +} > > + > > +/** > > + * of_iommu_get_resv_regions - reserved region driver helper for device tree > > + * @dev: device for which to get reserved regions > > + * @list: reserved region list > > + * > > + * IOMMU drivers can use this to implement their .get_resv_regions() callback > > + * for memory regions attached to a device tree node. See the reserved-memory > > + * device tree bindings on how to use these: > > + * > > + * Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt > > + */ > > +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) > > +{ > > +#if IS_ENABLED(CONFIG_OF_ADDRESS) > > + struct of_phandle_iterator it; > > + int err; > > + > > + of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) { > > + const __be32 *maps, *end; > > + struct resource res; > > + int size; > > + > > + memset(&res, 0, sizeof(res)); > > + > > + /* > > + * The "reg" property is optional and can be omitted by reserved-memory regions > > + * that represent reservations in the IOVA space, which are regions that should > > + * not be mapped. > > + */ > > + if (of_find_property(it.node, "reg", NULL)) { > > + err = of_address_to_resource(it.node, 0, &res); > > + if (err < 0) { > > + dev_err(dev, "failed to parse memory region %pOF: %d\n", > > + it.node, err); > > + continue; > > + } > > + } > > + > > + maps = of_get_property(it.node, "iommu-addresses", &size); > > + if (!maps) > > + continue; > > + > > + end = maps + size / sizeof(__be32); > > + > > + while (maps < end) { > > + struct device_node *np; > > + u32 phandle; > > + int na, ns; > > + > > + phandle = be32_to_cpup(maps++); > > + np = of_find_node_by_phandle(phandle); > > + na = of_n_addr_cells(np); > > + ns = of_n_size_cells(np); > > + > > + if (np == dev->of_node) { > > + int prot = IOMMU_READ | IOMMU_WRITE; > > + struct iommu_resv_region *region; > > + enum iommu_resv_type type; > > + phys_addr_t start; > > + size_t length; > > + > > + start = of_translate_dma_address(np, maps); > > + length = of_read_number(maps + na, ns); > > + > > + /* > > + * IOMMU regions without an associated physical region cannot be > > + * mapped and are simply reservations. > > + */ > > + if (res.end > res.start) { > > + phys_addr_t end = start + length - 1; > > + > > + if (check_direct_mapping(dev, &res, start, end)) > > + type = IOMMU_RESV_DIRECT_RELAXABLE; > > Again I really don't think we should assume relaxable by default. > > Looking at the shape of things now, it seems like check_direct_mappings() > wants to subsume the check on res as well and grow in to a more general > function for determining the iommu_resv_type. Then we've got a clear place > to start special-casing things like simple-framebuffer that we do know a bit > more about. Okay, I think I know where you're going with this. Let me see what I can come up with. Thierry > > Thanks, > Robin. > > > + else > > + type = IOMMU_RESV_RESERVED; > > + } else { > > + type = IOMMU_RESV_RESERVED; > > + } > > + > > + region = iommu_alloc_resv_region(start, length, prot, type); > > + if (region) > > + list_add_tail(®ion->list, list); > > + } > > + > > + maps += na + ns; > > + } > > + } > > +#endif > > +} > > +EXPORT_SYMBOL(of_iommu_get_resv_regions); > > diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h > > index 55c1eb300a86..9a5e6b410dd2 100644 > > --- a/include/linux/of_iommu.h > > +++ b/include/linux/of_iommu.h > > @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev, > > struct device_node *master_np, > > const u32 *id); > > +extern void of_iommu_get_resv_regions(struct device *dev, > > + struct list_head *list); > > + > > #else > > static inline const struct iommu_ops *of_iommu_configure(struct device *dev, > > @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, > > return NULL; > > } > > +static inline void of_iommu_get_resv_regions(struct device *dev, > > + struct list_head *list) > > +{ > > +} > > + > > #endif /* CONFIG_OF_IOMMU */ > > #endif /* __OF_IOMMU_H */
On 2022-10-07 16:28, Thierry Reding wrote: [...] >>> @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, >>> return ops; >>> } >>> + >>> +static inline bool check_direct_mapping(struct device *dev, struct resource *phys, >> >> Where "phys" is the virtual address, right? :( > > No, phys is actually res passed in from of_iommu_get_resv_regions() > where it is the address read from the "reg" property. So that's the > physical address of the reserved region. Perhaps it'd be useful to > rename "res" to "phys" in that function to be a little more consistent. > It's actually the "start" and "end" values that are passed into this > function that refer to the I/O virtual addresses from iommu-addresses. Oh, so it's the phys_addr_t's that aren't physical addresses - well, it had to be wrong one way or the other :) I agree that s/res/phys/ in the main function, and maybe s/start/iova/ too, would be helpful. Thanks, Robin.
On Fri, Sep 23, 2022 at 02:35:54PM +0200, Thierry Reding wrote: > From: Thierry Reding <treding@nvidia.com> > > This is an implementation that IOMMU drivers can use to obtain reserved > memory regions from a device tree node. It uses the reserved-memory DT > bindings to find the regions associated with a given device. If these > regions are marked accordingly, identity mappings will be created for > them in the IOMMU domain that the devices will be attached to. > > Cc: Frank Rowand <frowand.list@gmail.com> > Cc: devicetree@vger.kernel.org > Reviewed-by: Rob Herring <robh@kernel.org> > Signed-off-by: Thierry Reding <treding@nvidia.com> > --- > Changes in v9: > - address review comments by Robin Murphy: > - warn about non-direct mappings since they are not supported yet > - cleanup code to require less indentation > - narrow scope of variables > > Changes in v8: > - cleanup set-but-unused variables > > Changes in v6: > - remove reference to now unused dt-bindings/reserved-memory.h include > > Changes in v5: > - update for new "iommu-addresses" device tree bindings > > Changes in v4: > - fix build failure on !CONFIG_OF_ADDRESS > > Changes in v3: > - change "active" property to identity mapping flag that is part of the > memory region specifier (as defined by #memory-region-cells) to allow > per-reference flags to be used > > Changes in v2: > - use "active" property to determine whether direct mappings are needed > > drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++ > include/linux/of_iommu.h | 8 +++ > 2 files changed, 112 insertions(+) > > diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c > index 5696314ae69e..0bf2b08bca0a 100644 > --- a/drivers/iommu/of_iommu.c > +++ b/drivers/iommu/of_iommu.c > @@ -11,6 +11,7 @@ > #include <linux/module.h> > #include <linux/msi.h> > #include <linux/of.h> > +#include <linux/of_address.h> > #include <linux/of_iommu.h> > #include <linux/of_pci.h> > #include <linux/pci.h> > @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, > > return ops; > } > + > +static inline bool check_direct_mapping(struct device *dev, struct resource *phys, > + phys_addr_t start, phys_addr_t end) > +{ > + if (start != phys->start || end != phys->end) { > + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", > + &phys, &start, &end); > + return false; > + } > + > + return true; > +} > + > +/** > + * of_iommu_get_resv_regions - reserved region driver helper for device tree > + * @dev: device for which to get reserved regions > + * @list: reserved region list > + * > + * IOMMU drivers can use this to implement their .get_resv_regions() callback > + * for memory regions attached to a device tree node. See the reserved-memory > + * device tree bindings on how to use these: > + * > + * Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt > + */ > +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) > +{ > +#if IS_ENABLED(CONFIG_OF_ADDRESS) > + struct of_phandle_iterator it; > + int err; > + > + of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) { > + const __be32 *maps, *end; > + struct resource res; > + int size; > + > + memset(&res, 0, sizeof(res)); > + > + /* > + * The "reg" property is optional and can be omitted by reserved-memory regions > + * that represent reservations in the IOVA space, which are regions that should > + * not be mapped. > + */ > + if (of_find_property(it.node, "reg", NULL)) { > + err = of_address_to_resource(it.node, 0, &res); > + if (err < 0) { > + dev_err(dev, "failed to parse memory region %pOF: %d\n", > + it.node, err); > + continue; > + } > + } > + > + maps = of_get_property(it.node, "iommu-addresses", &size); > + if (!maps) > + continue; > + > + end = maps + size / sizeof(__be32); > + > + while (maps < end) { > + struct device_node *np; > + u32 phandle; > + int na, ns; > + > + phandle = be32_to_cpup(maps++); > + np = of_find_node_by_phandle(phandle); > + na = of_n_addr_cells(np); > + ns = of_n_size_cells(np); > + > + if (np == dev->of_node) { > + int prot = IOMMU_READ | IOMMU_WRITE; > + struct iommu_resv_region *region; > + enum iommu_resv_type type; > + phys_addr_t start; > + size_t length; > + > + start = of_translate_dma_address(np, maps); I just came across an issue when extending the testing from simple- framebuffer to the full display engine, with the main difference being that the fill display engine is hooked up both to the IOMMU and to the memory controller via the interconnects property ("dma-mem"). The latter seems to throw off the of_translate_dma_address() because we have a top-level bus@0 node that sets #address-cells = <1> and #size- cells = <1>, which is sufficient to represent the "reg" entries for the devices. However, for the reserved-memory node needs #address-cells = <2> and #size-cells = <2> to make sure we can describe memory regions above the 4 GiB boundary (and potentially larger than 4 GiB, too). What happens now is that of_translate_dma_address() will find the DMA parent for the display engine, which is the memory controller, which also has #address-cells = <2> and #size-cells = <2> for the same reason as the reserved-memory node. In other words, what this tries to model is that for DMA accesses, we span more than the 4 GiB range that is sufficient to address registers for IP blocks. However, of_translate_dma_address() then ends up getting #address-cells and #size-cells from the *parent* of the DMA parent. And then everything falls apart during translation. Any idea if I'm doing something wrong? Or is the code wrong and it's not actually using the right cell counts? Should it be using the cell counts from the DMA parent rather than its parent bus? Thierry
On Wed, Oct 19, 2022 at 08:03:31PM +0200, Thierry Reding wrote: > On Fri, Sep 23, 2022 at 02:35:54PM +0200, Thierry Reding wrote: > > From: Thierry Reding <treding@nvidia.com> > > > > This is an implementation that IOMMU drivers can use to obtain reserved > > memory regions from a device tree node. It uses the reserved-memory DT > > bindings to find the regions associated with a given device. If these > > regions are marked accordingly, identity mappings will be created for > > them in the IOMMU domain that the devices will be attached to. > > > > Cc: Frank Rowand <frowand.list@gmail.com> > > Cc: devicetree@vger.kernel.org > > Reviewed-by: Rob Herring <robh@kernel.org> > > Signed-off-by: Thierry Reding <treding@nvidia.com> > > --- > > Changes in v9: > > - address review comments by Robin Murphy: > > - warn about non-direct mappings since they are not supported yet > > - cleanup code to require less indentation > > - narrow scope of variables > > > > Changes in v8: > > - cleanup set-but-unused variables > > > > Changes in v6: > > - remove reference to now unused dt-bindings/reserved-memory.h include > > > > Changes in v5: > > - update for new "iommu-addresses" device tree bindings > > > > Changes in v4: > > - fix build failure on !CONFIG_OF_ADDRESS > > > > Changes in v3: > > - change "active" property to identity mapping flag that is part of the > > memory region specifier (as defined by #memory-region-cells) to allow > > per-reference flags to be used > > > > Changes in v2: > > - use "active" property to determine whether direct mappings are needed > > > > drivers/iommu/of_iommu.c | 104 +++++++++++++++++++++++++++++++++++++++ > > include/linux/of_iommu.h | 8 +++ > > 2 files changed, 112 insertions(+) > > > > diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c > > index 5696314ae69e..0bf2b08bca0a 100644 > > --- a/drivers/iommu/of_iommu.c > > +++ b/drivers/iommu/of_iommu.c > > @@ -11,6 +11,7 @@ > > #include <linux/module.h> > > #include <linux/msi.h> > > #include <linux/of.h> > > +#include <linux/of_address.h> > > #include <linux/of_iommu.h> > > #include <linux/of_pci.h> > > #include <linux/pci.h> > > @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, > > > > return ops; > > } > > + > > +static inline bool check_direct_mapping(struct device *dev, struct resource *phys, > > + phys_addr_t start, phys_addr_t end) > > +{ > > + if (start != phys->start || end != phys->end) { > > + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", > > + &phys, &start, &end); > > + return false; > > + } > > + > > + return true; > > +} > > + > > +/** > > + * of_iommu_get_resv_regions - reserved region driver helper for device tree > > + * @dev: device for which to get reserved regions > > + * @list: reserved region list > > + * > > + * IOMMU drivers can use this to implement their .get_resv_regions() callback > > + * for memory regions attached to a device tree node. See the reserved-memory > > + * device tree bindings on how to use these: > > + * > > + * Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt > > + */ > > +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) > > +{ > > +#if IS_ENABLED(CONFIG_OF_ADDRESS) > > + struct of_phandle_iterator it; > > + int err; > > + > > + of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) { > > + const __be32 *maps, *end; > > + struct resource res; > > + int size; > > + > > + memset(&res, 0, sizeof(res)); > > + > > + /* > > + * The "reg" property is optional and can be omitted by reserved-memory regions > > + * that represent reservations in the IOVA space, which are regions that should > > + * not be mapped. > > + */ > > + if (of_find_property(it.node, "reg", NULL)) { > > + err = of_address_to_resource(it.node, 0, &res); > > + if (err < 0) { > > + dev_err(dev, "failed to parse memory region %pOF: %d\n", > > + it.node, err); > > + continue; > > + } > > + } > > + > > + maps = of_get_property(it.node, "iommu-addresses", &size); > > + if (!maps) > > + continue; > > + > > + end = maps + size / sizeof(__be32); > > + > > + while (maps < end) { > > + struct device_node *np; > > + u32 phandle; > > + int na, ns; > > + > > + phandle = be32_to_cpup(maps++); > > + np = of_find_node_by_phandle(phandle); > > + na = of_n_addr_cells(np); > > + ns = of_n_size_cells(np); > > + > > + if (np == dev->of_node) { > > + int prot = IOMMU_READ | IOMMU_WRITE; > > + struct iommu_resv_region *region; > > + enum iommu_resv_type type; > > + phys_addr_t start; > > + size_t length; > > + > > + start = of_translate_dma_address(np, maps); > > I just came across an issue when extending the testing from simple- > framebuffer to the full display engine, with the main difference being > that the fill display engine is hooked up both to the IOMMU and to the > memory controller via the interconnects property ("dma-mem"). > > The latter seems to throw off the of_translate_dma_address() because we > have a top-level bus@0 node that sets #address-cells = <1> and #size- > cells = <1>, which is sufficient to represent the "reg" entries for the > devices. However, for the reserved-memory node needs #address-cells = > <2> and #size-cells = <2> to make sure we can describe memory regions > above the 4 GiB boundary (and potentially larger than 4 GiB, too). > > What happens now is that of_translate_dma_address() will find the DMA > parent for the display engine, which is the memory controller, which > also has #address-cells = <2> and #size-cells = <2> for the same reason > as the reserved-memory node. In other words, what this tries to model is > that for DMA accesses, we span more than the 4 GiB range that is > sufficient to address registers for IP blocks. > > However, of_translate_dma_address() then ends up getting #address-cells > and #size-cells from the *parent* of the DMA parent. And then everything > falls apart during translation. > > Any idea if I'm doing something wrong? Or is the code wrong and it's not > actually using the right cell counts? Should it be using the cell counts > from the DMA parent rather than its parent bus? I came up with the attached patch. This works for my case, but will abort the DMA parent traversal early on some devices. I'm not sure how much this would matter in practice. A safer way would be to create a new variant of __of_get_dma_parent() that doesn't have the of_get_parent() fallback. That's assuming that we agree on the concept of having potentially different cell counts, and effectively DMA busses that are separate from the traditional control busses in DT. Do we also need separate DMA cell counts so that one node can be a DMA bus and a control bus at the same time? Or is this overcomplicating things and a simpler approach would be to propagate the cell counts all the way to the top level? I think this all might work with the existing code if I make bus@0's cell count 2 & 2 for Tegra SoC DTSI files. It's a lot of churn and seems more like a workaround rather than a correct model of the busses. Thierry From 7f63e7c86fa43f6c7d9254323606daeeb442cf48 Mon Sep 17 00:00:00 2001 From: Thierry Reding <treding@nvidia.com> Date: Thu, 20 Oct 2022 15:21:10 +0200 Subject: [PATCH] of: Stop DMA translation at last DMA parent DMA parent devices can define separate DMA busses via the "dma-ranges" and "#address-cells" and "#size-cells" properties. If the DMA bus has different cell counts than its parent, this can cause the translation of DMA address to fails (e.g. truncation from 2 to 1 address cells). Avoid this by stopping to search for DMA parents when a parent without a "dma-ranges" property is encountered. Also, since it is the DMA parent that defines the DMA bus, use the bus' cell counts instead of its parent cell counts. Signed-off-by: Thierry Reding <treding@nvidia.com> --- drivers/of/address.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/of/address.c b/drivers/of/address.c index 14f137a21b0c..e2f45bdbc41a 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -475,6 +475,7 @@ static u64 __of_translate_address(struct device_node *dev, const __be32 *in_addr, const char *rprop, struct device_node **host) { + bool dma = rprop && !strcmp(rprop, "dma-ranges"); struct device_node *parent = NULL; struct of_bus *bus, *pbus; __be32 addr[OF_MAX_ADDR_CELLS]; @@ -494,7 +495,12 @@ static u64 __of_translate_address(struct device_node *dev, bus = of_match_bus(parent); /* Count address cells & copy address locally */ - bus->count_cells(dev, &na, &ns); + if (dma) { + na = of_bus_n_addr_cells(parent); + ns = of_bus_n_size_cells(parent); + } else { + bus->count_cells(dev, &na, &ns); + } if (!OF_CHECK_COUNTS(na, ns)) { pr_debug("Bad cell count for %pOF\n", dev); goto bail; @@ -515,7 +521,7 @@ static u64 __of_translate_address(struct device_node *dev, parent = get_parent(dev); /* If root, we have finished */ - if (parent == NULL) { + if (parent == NULL || (dma && !of_get_property(parent, "dma-ranges", NULL))) { pr_debug("reached root node\n"); result = of_read_number(addr, na); break; @@ -536,7 +542,12 @@ static u64 __of_translate_address(struct device_node *dev, /* Get new parent bus and counts */ pbus = of_match_bus(parent); - pbus->count_cells(dev, &pna, &pns); + if (dma) { + pna = of_bus_n_addr_cells(parent); + pns = of_bus_n_size_cells(parent); + } else { + pbus->count_cells(dev, &pna, &pns); + } if (!OF_CHECK_COUNTS(pna, pns)) { pr_err("Bad cell count for %pOF\n", dev); break;
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 5696314ae69e..0bf2b08bca0a 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -11,6 +11,7 @@ #include <linux/module.h> #include <linux/msi.h> #include <linux/of.h> +#include <linux/of_address.h> #include <linux/of_iommu.h> #include <linux/of_pci.h> #include <linux/pci.h> @@ -172,3 +173,106 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, return ops; } + +static inline bool check_direct_mapping(struct device *dev, struct resource *phys, + phys_addr_t start, phys_addr_t end) +{ + if (start != phys->start || end != phys->end) { + dev_warn(dev, "treating non-direct mapping [%pr] -> [%pap-%pap] as reservation\n", + &phys, &start, &end); + return false; + } + + return true; +} + +/** + * of_iommu_get_resv_regions - reserved region driver helper for device tree + * @dev: device for which to get reserved regions + * @list: reserved region list + * + * IOMMU drivers can use this to implement their .get_resv_regions() callback + * for memory regions attached to a device tree node. See the reserved-memory + * device tree bindings on how to use these: + * + * Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt + */ +void of_iommu_get_resv_regions(struct device *dev, struct list_head *list) +{ +#if IS_ENABLED(CONFIG_OF_ADDRESS) + struct of_phandle_iterator it; + int err; + + of_for_each_phandle(&it, err, dev->of_node, "memory-region", NULL, 0) { + const __be32 *maps, *end; + struct resource res; + int size; + + memset(&res, 0, sizeof(res)); + + /* + * The "reg" property is optional and can be omitted by reserved-memory regions + * that represent reservations in the IOVA space, which are regions that should + * not be mapped. + */ + if (of_find_property(it.node, "reg", NULL)) { + err = of_address_to_resource(it.node, 0, &res); + if (err < 0) { + dev_err(dev, "failed to parse memory region %pOF: %d\n", + it.node, err); + continue; + } + } + + maps = of_get_property(it.node, "iommu-addresses", &size); + if (!maps) + continue; + + end = maps + size / sizeof(__be32); + + while (maps < end) { + struct device_node *np; + u32 phandle; + int na, ns; + + phandle = be32_to_cpup(maps++); + np = of_find_node_by_phandle(phandle); + na = of_n_addr_cells(np); + ns = of_n_size_cells(np); + + if (np == dev->of_node) { + int prot = IOMMU_READ | IOMMU_WRITE; + struct iommu_resv_region *region; + enum iommu_resv_type type; + phys_addr_t start; + size_t length; + + start = of_translate_dma_address(np, maps); + length = of_read_number(maps + na, ns); + + /* + * IOMMU regions without an associated physical region cannot be + * mapped and are simply reservations. + */ + if (res.end > res.start) { + phys_addr_t end = start + length - 1; + + if (check_direct_mapping(dev, &res, start, end)) + type = IOMMU_RESV_DIRECT_RELAXABLE; + else + type = IOMMU_RESV_RESERVED; + } else { + type = IOMMU_RESV_RESERVED; + } + + region = iommu_alloc_resv_region(start, length, prot, type); + if (region) + list_add_tail(®ion->list, list); + } + + maps += na + ns; + } + } +#endif +} +EXPORT_SYMBOL(of_iommu_get_resv_regions); diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h index 55c1eb300a86..9a5e6b410dd2 100644 --- a/include/linux/of_iommu.h +++ b/include/linux/of_iommu.h @@ -12,6 +12,9 @@ extern const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np, const u32 *id); +extern void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list); + #else static inline const struct iommu_ops *of_iommu_configure(struct device *dev, @@ -21,6 +24,11 @@ static inline const struct iommu_ops *of_iommu_configure(struct device *dev, return NULL; } +static inline void of_iommu_get_resv_regions(struct device *dev, + struct list_head *list) +{ +} + #endif /* CONFIG_OF_IOMMU */ #endif /* __OF_IOMMU_H */