Message ID | 20201111054356.793390-5-ben.widawsky@intel.com |
---|---|
State | New |
Headers | show |
Series | CXL 2.0 Support | expand |
On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote: > All the necessary bits are initialized in order to find and map the > register space for CXL Memory Devices. This is accomplished by using the > Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to > use, and how much of an offset from that BAR should be added. "Initialize the necessary bits ..." to use the usual imperative sentence structure, as you did in the subject. > If the memory device registers are found and mapped a new internal data > structure tracking device state is allocated. "Allocate device state if we find device registers" or similar. > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > --- > drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++---- > drivers/cxl/pci.h | 6 +++++ > 2 files changed, 69 insertions(+), 5 deletions(-) > > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c > index aa7d881fa47b..8d9b9ab6c5ea 100644 > --- a/drivers/cxl/mem.c > +++ b/drivers/cxl/mem.c > @@ -7,9 +7,49 @@ > #include "pci.h" > > struct cxl_mem { > + struct pci_dev *pdev; > void __iomem *regs; > }; > > +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi) > +{ > + struct device *dev = &pdev->dev; > + struct cxl_mem *cxlm; > + void __iomem *regs; > + u64 offset; > + u8 bar; > + int rc; > + > + offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000); > + bar = reg_lo & 0x7; > + > + /* Basic sanity check that BAR is big enough */ > + if (pci_resource_len(pdev, bar) < offset) { > + dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n", > + bar, &pdev->resource[bar], (unsigned long long) offset); s/bar/BAR/ > + return ERR_PTR(-ENXIO); > + } > + > + rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev)); > + if (rc != 0) { > + dev_err(dev, "failed to map registers\n"); > + return ERR_PTR(-ENXIO); > + } > + > + cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL); > + if (!cxlm) { > + dev_err(dev, "No memory available\n"); > + return ERR_PTR(-ENOMEM); > + } > + > + regs = pcim_iomap_table(pdev)[bar]; > + cxlm->pdev = pdev; > + cxlm->regs = regs + offset; > + > + dev_dbg(dev, "Mapped CXL Memory Device resource\n"); > + return cxlm; > +} > + > static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) > { > int pos; > @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) > > static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > + struct cxl_mem *cxlm = ERR_PTR(-ENXIO); > struct device *dev = &pdev->dev; > - struct cxl_mem *cxlm; The order was better before ("dev", then "clxm"). Oh, I suppose this is a "reverse Christmas tree" thing. > - int rc, regloc; > + int rc, regloc, i; > > rc = cxl_bus_prepared(pdev); > if (rc != 0) { > @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > return rc; > } > > + rc = pcim_enable_device(pdev); > + if (rc) > + return rc; > + > regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC); > if (!regloc) { > dev_err(dev, "register location dvsec not found\n"); > return -ENXIO; > } > + regloc += 0xc; /* Skip DVSEC + reserved fields */ > + > + for (i = regloc; i < regloc + 0x24; i += 8) { > + u32 reg_lo, reg_hi; > + > + pci_read_config_dword(pdev, i, ®_lo); > + pci_read_config_dword(pdev, i + 4, ®_hi); > + > + if (CXL_REGLOG_IS_MEMDEV(reg_lo)) { > + cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); > + break; > + } > + } > + > + if (IS_ERR(cxlm)) > + return -ENXIO; I think this would be easier to read if cxl_mem_create() returned NULL on failure (it prints error messages and we throw away -ENXIO/-ENOMEM distinction here anyway) so you could do: struct cxl_mem *cxlm = NULL; for (...) { if (...) { cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); break; } } if (!cxlm) return -ENXIO; /* -ENODEV might be more natural? */ > - cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL); > - if (!cxlm) > - return -ENOMEM; > + pci_set_drvdata(pdev, cxlm); > > return 0; > } > diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h > index beb03921e6da..be87f62e9132 100644 > --- a/drivers/cxl/pci.h > +++ b/drivers/cxl/pci.h > @@ -12,4 +12,10 @@ > #define PCI_DVSEC_ID_CXL 0x0 > #define PCI_DVSEC_ID_CXL_REGLOC 0x8 > > +#define CXL_REGLOG_RBI_EMPTY 0 > +#define CXL_REGLOG_RBI_COMPONENT 1 > +#define CXL_REGLOG_RBI_VIRT 2 > +#define CXL_REGLOG_RBI_MEMDEV 3 Maybe line these values up. > +#define CXL_REGLOG_IS_MEMDEV(x) ((((x) >> 8) & 0xff) == CXL_REGLOG_RBI_MEMDEV) If these are only needed in cxl/mem.c, they could go there. Do you expect code outside of drivers/cxl to need these? > #endif /* __CXL_PCI_H__ */ > -- > 2.29.2 >
On 20-11-13 12:17:32, Bjorn Helgaas wrote: > On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote: > > All the necessary bits are initialized in order to find and map the > > register space for CXL Memory Devices. This is accomplished by using the > > Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to > > use, and how much of an offset from that BAR should be added. > > "Initialize the necessary bits ..." to use the usual imperative > sentence structure, as you did in the subject. > > > If the memory device registers are found and mapped a new internal data > > structure tracking device state is allocated. > > "Allocate device state if we find device registers" or similar. > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > > --- > > drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++---- > > drivers/cxl/pci.h | 6 +++++ > > 2 files changed, 69 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c > > index aa7d881fa47b..8d9b9ab6c5ea 100644 > > --- a/drivers/cxl/mem.c > > +++ b/drivers/cxl/mem.c > > @@ -7,9 +7,49 @@ > > #include "pci.h" > > > > struct cxl_mem { > > + struct pci_dev *pdev; > > void __iomem *regs; > > }; > > > > +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi) > > +{ > > + struct device *dev = &pdev->dev; > > + struct cxl_mem *cxlm; > > + void __iomem *regs; > > + u64 offset; > > + u8 bar; > > + int rc; > > + > > + offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000); > > + bar = reg_lo & 0x7; > > + > > + /* Basic sanity check that BAR is big enough */ > > + if (pci_resource_len(pdev, bar) < offset) { > > + dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n", > > + bar, &pdev->resource[bar], (unsigned long long) offset); > > s/bar/BAR/ > > > + return ERR_PTR(-ENXIO); > > + } > > + > > + rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev)); > > + if (rc != 0) { > > + dev_err(dev, "failed to map registers\n"); > > + return ERR_PTR(-ENXIO); > > + } > > + > > + cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL); > > + if (!cxlm) { > > + dev_err(dev, "No memory available\n"); > > + return ERR_PTR(-ENOMEM); > > + } > > + > > + regs = pcim_iomap_table(pdev)[bar]; > > + cxlm->pdev = pdev; > > + cxlm->regs = regs + offset; > > + > > + dev_dbg(dev, "Mapped CXL Memory Device resource\n"); > > + return cxlm; > > +} > > + > > static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) > > { > > int pos; > > @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) > > > > static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > { > > + struct cxl_mem *cxlm = ERR_PTR(-ENXIO); > > struct device *dev = &pdev->dev; > > - struct cxl_mem *cxlm; > > The order was better before ("dev", then "clxm"). Oh, I suppose this > is a "reverse Christmas tree" thing. > I don't actually care either way as long as it's consistent. I tend to do reverse Christmas tree for no particular reason. > > - int rc, regloc; > > + int rc, regloc, i; > > > > rc = cxl_bus_prepared(pdev); > > if (rc != 0) { > > @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > return rc; > > } > > > > + rc = pcim_enable_device(pdev); > > + if (rc) > > + return rc; > > + > > regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC); > > if (!regloc) { > > dev_err(dev, "register location dvsec not found\n"); > > return -ENXIO; > > } > > + regloc += 0xc; /* Skip DVSEC + reserved fields */ > > + > > + for (i = regloc; i < regloc + 0x24; i += 8) { > > + u32 reg_lo, reg_hi; > > + > > + pci_read_config_dword(pdev, i, ®_lo); > > + pci_read_config_dword(pdev, i + 4, ®_hi); > > + > > + if (CXL_REGLOG_IS_MEMDEV(reg_lo)) { > > + cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); > > + break; > > + } > > + } > > + > > + if (IS_ERR(cxlm)) > > + return -ENXIO; > > I think this would be easier to read if cxl_mem_create() returned NULL > on failure (it prints error messages and we throw away > -ENXIO/-ENOMEM distinction here anyway) so you could do: > > struct cxl_mem *cxlm = NULL; > > for (...) { > if (...) { > cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); > break; > } > } > > if (!cxlm) > return -ENXIO; /* -ENODEV might be more natural? */ > I agree on both counts. Both of these came from Dan, so I will let him explain. > > - cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL); > > - if (!cxlm) > > - return -ENOMEM; > > + pci_set_drvdata(pdev, cxlm); > > > > return 0; > > } > > diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h > > index beb03921e6da..be87f62e9132 100644 > > --- a/drivers/cxl/pci.h > > +++ b/drivers/cxl/pci.h > > @@ -12,4 +12,10 @@ > > #define PCI_DVSEC_ID_CXL 0x0 > > #define PCI_DVSEC_ID_CXL_REGLOC 0x8 > > > > +#define CXL_REGLOG_RBI_EMPTY 0 > > +#define CXL_REGLOG_RBI_COMPONENT 1 > > +#define CXL_REGLOG_RBI_VIRT 2 > > +#define CXL_REGLOG_RBI_MEMDEV 3 > > Maybe line these values up. > > > +#define CXL_REGLOG_IS_MEMDEV(x) ((((x) >> 8) & 0xff) == CXL_REGLOG_RBI_MEMDEV) > > If these are only needed in cxl/mem.c, they could go there. Do you > expect code outside of drivers/cxl to need these? Will do. I'll suck in everything else as they seem like improvements. > > > #endif /* __CXL_PCI_H__ */ > > -- > > 2.29.2 > >
On Fri, Nov 13, 2020 at 5:12 PM Ben Widawsky <ben.widawsky@intel.com> wrote: > > On 20-11-13 12:17:32, Bjorn Helgaas wrote: > > On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote: > > > All the necessary bits are initialized in order to find and map the > > > register space for CXL Memory Devices. This is accomplished by using the > > > Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to > > > use, and how much of an offset from that BAR should be added. > > > > "Initialize the necessary bits ..." to use the usual imperative > > sentence structure, as you did in the subject. > > > > > If the memory device registers are found and mapped a new internal data > > > structure tracking device state is allocated. > > > > "Allocate device state if we find device registers" or similar. > > > > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > > > --- > > > drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++---- > > > drivers/cxl/pci.h | 6 +++++ > > > 2 files changed, 69 insertions(+), 5 deletions(-) > > > > > > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c > > > index aa7d881fa47b..8d9b9ab6c5ea 100644 > > > --- a/drivers/cxl/mem.c > > > +++ b/drivers/cxl/mem.c > > > @@ -7,9 +7,49 @@ > > > #include "pci.h" > > > > > > struct cxl_mem { > > > + struct pci_dev *pdev; > > > void __iomem *regs; > > > }; > > > > > > +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi) > > > +{ > > > + struct device *dev = &pdev->dev; > > > + struct cxl_mem *cxlm; > > > + void __iomem *regs; > > > + u64 offset; > > > + u8 bar; > > > + int rc; > > > + > > > + offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000); > > > + bar = reg_lo & 0x7; > > > + > > > + /* Basic sanity check that BAR is big enough */ > > > + if (pci_resource_len(pdev, bar) < offset) { > > > + dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n", > > > + bar, &pdev->resource[bar], (unsigned long long) offset); > > > > s/bar/BAR/ > > > > > + return ERR_PTR(-ENXIO); > > > + } > > > + > > > + rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev)); > > > + if (rc != 0) { > > > + dev_err(dev, "failed to map registers\n"); > > > + return ERR_PTR(-ENXIO); > > > + } > > > + > > > + cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL); > > > + if (!cxlm) { > > > + dev_err(dev, "No memory available\n"); > > > + return ERR_PTR(-ENOMEM); > > > + } > > > + > > > + regs = pcim_iomap_table(pdev)[bar]; > > > + cxlm->pdev = pdev; > > > + cxlm->regs = regs + offset; > > > + > > > + dev_dbg(dev, "Mapped CXL Memory Device resource\n"); > > > + return cxlm; > > > +} > > > + > > > static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) > > > { > > > int pos; > > > @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) > > > > > > static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > > { > > > + struct cxl_mem *cxlm = ERR_PTR(-ENXIO); > > > struct device *dev = &pdev->dev; > > > - struct cxl_mem *cxlm; > > > > The order was better before ("dev", then "clxm"). Oh, I suppose this > > is a "reverse Christmas tree" thing. > > > > I don't actually care either way as long as it's consistent. I tend to do > reverse Christmas tree for no particular reason. Yeah, reverse Christmas tree for no particular reason. > > > > - int rc, regloc; > > > + int rc, regloc, i; > > > > > > rc = cxl_bus_prepared(pdev); > > > if (rc != 0) { > > > @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > > return rc; > > > } > > > > > > + rc = pcim_enable_device(pdev); > > > + if (rc) > > > + return rc; > > > + > > > regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC); > > > if (!regloc) { > > > dev_err(dev, "register location dvsec not found\n"); > > > return -ENXIO; > > > } > > > + regloc += 0xc; /* Skip DVSEC + reserved fields */ > > > + > > > + for (i = regloc; i < regloc + 0x24; i += 8) { > > > + u32 reg_lo, reg_hi; > > > + > > > + pci_read_config_dword(pdev, i, ®_lo); > > > + pci_read_config_dword(pdev, i + 4, ®_hi); > > > + > > > + if (CXL_REGLOG_IS_MEMDEV(reg_lo)) { > > > + cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); > > > + break; > > > + } > > > + } > > > + > > > + if (IS_ERR(cxlm)) > > > + return -ENXIO; > > > > I think this would be easier to read if cxl_mem_create() returned NULL > > on failure (it prints error messages and we throw away > > -ENXIO/-ENOMEM distinction here anyway) so you could do: > > > > struct cxl_mem *cxlm = NULL; > > > > for (...) { > > if (...) { > > cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); > > break; > > } > > } > > > > if (!cxlm) > > return -ENXIO; /* -ENODEV might be more natural? */ > > > > I agree on both counts. Both of these came from Dan, so I will let him explain. I'm not attached to differentiating -ENOMEM from -ENXIO and am ok to drop the ERR_PTR() return. I do tend to use -ENXIO for failure to perform an initialization action vs failure to even find the device, but if -ENODEV seems more idiomatic to Bjorn, I won't argue.
On Mon, Nov 16, 2020 at 03:19:41PM -0800, Dan Williams wrote: > On Fri, Nov 13, 2020 at 5:12 PM Ben Widawsky <ben.widawsky@intel.com> wrote: > > On 20-11-13 12:17:32, Bjorn Helgaas wrote: > > > On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote: > > > > static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > > > { > > > > + struct cxl_mem *cxlm = ERR_PTR(-ENXIO); > > > > struct device *dev = &pdev->dev; > > > > - struct cxl_mem *cxlm; > > > > > > The order was better before ("dev", then "clxm"). Oh, I suppose this > > > is a "reverse Christmas tree" thing. > > > > > > > I don't actually care either way as long as it's consistent. I tend to do > > reverse Christmas tree for no particular reason. > > Yeah, reverse Christmas tree for no particular reason. FWIW, the usual drivers/pci style is to order the decls in the order the variables are used in the code. But this isn't drivers/pci, so it's up to you. I only noticed because changing the order made the diff bigger than it needed to be. > > > I think this would be easier to read if cxl_mem_create() returned NULL > > > on failure (it prints error messages and we throw away > > > -ENXIO/-ENOMEM distinction here anyway) so you could do: > > > > > > struct cxl_mem *cxlm = NULL; > > > > > > for (...) { > > > if (...) { > > > cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); > > > break; > > > } > > > } > > > > > > if (!cxlm) > > > return -ENXIO; /* -ENODEV might be more natural? */ > > > > > > > I agree on both counts. Both of these came from Dan, so I will let him explain. > > I'm not attached to differentiating -ENOMEM from -ENXIO and am ok to > drop the ERR_PTR() return. I do tend to use -ENXIO for failure to > perform an initialization action vs failure to even find the device, > but if -ENODEV seems more idiomatic to Bjorn, I won't argue. -ENXIO is fine with me. I just don't see it as often so I don't really know what it is. Bjorn
On Tue, 10 Nov 2020 21:43:51 -0800 Ben Widawsky <ben.widawsky@intel.com> wrote: > All the necessary bits are initialized in order to find and map the > register space for CXL Memory Devices. This is accomplished by using the > Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to > use, and how much of an offset from that BAR should be added. > > If the memory device registers are found and mapped a new internal data > structure tracking device state is allocated. > > Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> > --- > drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++---- > drivers/cxl/pci.h | 6 +++++ > 2 files changed, 69 insertions(+), 5 deletions(-) > > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c > index aa7d881fa47b..8d9b9ab6c5ea 100644 > --- a/drivers/cxl/mem.c > +++ b/drivers/cxl/mem.c > @@ -7,9 +7,49 @@ > #include "pci.h" > > struct cxl_mem { > + struct pci_dev *pdev; > void __iomem *regs; > }; > > +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi) > +{ > + struct device *dev = &pdev->dev; > + struct cxl_mem *cxlm; > + void __iomem *regs; > + u64 offset; > + u8 bar; > + int rc; > + > + offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000); > + bar = reg_lo & 0x7; > + > + /* Basic sanity check that BAR is big enough */ > + if (pci_resource_len(pdev, bar) < offset) { > + dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n", > + bar, &pdev->resource[bar], (unsigned long long) offset); > + return ERR_PTR(-ENXIO); > + } > + > + rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev)); > + if (rc != 0) { > + dev_err(dev, "failed to map registers\n"); > + return ERR_PTR(-ENXIO); > + } > + > + cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL); > + if (!cxlm) { > + dev_err(dev, "No memory available\n"); > + return ERR_PTR(-ENOMEM); > + } > + > + regs = pcim_iomap_table(pdev)[bar]; > + cxlm->pdev = pdev; > + cxlm->regs = regs + offset; > + > + dev_dbg(dev, "Mapped CXL Memory Device resource\n"); > + return cxlm; > +} > + > static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) > { > int pos; > @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) > > static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > { > + struct cxl_mem *cxlm = ERR_PTR(-ENXIO); > struct device *dev = &pdev->dev; > - struct cxl_mem *cxlm; > - int rc, regloc; > + int rc, regloc, i; > > rc = cxl_bus_prepared(pdev); > if (rc != 0) { > @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > return rc; > } > > + rc = pcim_enable_device(pdev); > + if (rc) > + return rc; > + > regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC); > if (!regloc) { > dev_err(dev, "register location dvsec not found\n"); > return -ENXIO; > } > + regloc += 0xc; /* Skip DVSEC + reserved fields */ > + > + for (i = regloc; i < regloc + 0x24; i += 8) { > + u32 reg_lo, reg_hi; Hmm. That "register offset low" naming in the spec is just designed to confuse given lots of other things packed in the register. Perhaps a comment here to say it contains other information? Also possibly some docs for cxl_mem_create to make the same point there. > + > + pci_read_config_dword(pdev, i, ®_lo); > + pci_read_config_dword(pdev, i + 4, ®_hi); > + > + if (CXL_REGLOG_IS_MEMDEV(reg_lo)) { > + cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); > + break; > + } > + } > + > + if (IS_ERR(cxlm)) > + return -ENXIO; > > - cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL); > - if (!cxlm) > - return -ENOMEM; > + pci_set_drvdata(pdev, cxlm); I could be wrong but don't think this is used yet. I'd prefer to see it introduced only when it is. Makes it easy to match up without having to search back in earlier patches. > > return 0; > } > diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h > index beb03921e6da..be87f62e9132 100644 > --- a/drivers/cxl/pci.h > +++ b/drivers/cxl/pci.h > @@ -12,4 +12,10 @@ > #define PCI_DVSEC_ID_CXL 0x0 > #define PCI_DVSEC_ID_CXL_REGLOC 0x8 > > +#define CXL_REGLOG_RBI_EMPTY 0 As in the QEMU patches, please add a comment on what RBI means here. It's non obvious even just after you've read through the spec! > +#define CXL_REGLOG_RBI_COMPONENT 1 > +#define CXL_REGLOG_RBI_VIRT 2 > +#define CXL_REGLOG_RBI_MEMDEV 3 > +#define CXL_REGLOG_IS_MEMDEV(x) ((((x) >> 8) & 0xff) == CXL_REGLOG_RBI_MEMDEV) > + > #endif /* __CXL_PCI_H__ */
On 20-11-16 18:23:21, Bjorn Helgaas wrote: > On Mon, Nov 16, 2020 at 03:19:41PM -0800, Dan Williams wrote: > > On Fri, Nov 13, 2020 at 5:12 PM Ben Widawsky <ben.widawsky@intel.com> wrote: > > > On 20-11-13 12:17:32, Bjorn Helgaas wrote: > > > > On Tue, Nov 10, 2020 at 09:43:51PM -0800, Ben Widawsky wrote: > > > > > > static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) > > > > > { > > > > > + struct cxl_mem *cxlm = ERR_PTR(-ENXIO); > > > > > struct device *dev = &pdev->dev; > > > > > - struct cxl_mem *cxlm; > > > > > > > > The order was better before ("dev", then "clxm"). Oh, I suppose this > > > > is a "reverse Christmas tree" thing. > > > > > > > > > > I don't actually care either way as long as it's consistent. I tend to do > > > reverse Christmas tree for no particular reason. > > > > Yeah, reverse Christmas tree for no particular reason. > > FWIW, the usual drivers/pci style is to order the decls in the order > the variables are used in the code. But this isn't drivers/pci, so > it's up to you. I only noticed because changing the order made the > diff bigger than it needed to be. > > > > > I think this would be easier to read if cxl_mem_create() returned NULL > > > > on failure (it prints error messages and we throw away > > > > -ENXIO/-ENOMEM distinction here anyway) so you could do: > > > > > > > > struct cxl_mem *cxlm = NULL; > > > > > > > > for (...) { > > > > if (...) { > > > > cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); > > > > break; > > > > } > > > > } > > > > > > > > if (!cxlm) > > > > return -ENXIO; /* -ENODEV might be more natural? */ > > > > > > > > > > I agree on both counts. Both of these came from Dan, so I will let him explain. > > > > I'm not attached to differentiating -ENOMEM from -ENXIO and am ok to > > drop the ERR_PTR() return. I do tend to use -ENXIO for failure to > > perform an initialization action vs failure to even find the device, > > but if -ENODEV seems more idiomatic to Bjorn, I won't argue. > > -ENXIO is fine with me. I just don't see it as often so I don't > really know what it is. > > Bjorn Dan, Bjorn, I did a fairly randomized look at various probe functions and ENODEV seems to be more common. My sort of historical use has been - ENODEV: General, couldn't establish device presence - ENXIO: Device was there but something is totally misconfigured - E*: A matching errno for exactly what went wrong My question though is, would it be useful to propagate the error up through probe?
On Mon, Nov 23, 2020 at 11:20 AM Ben Widawsky <ben.widawsky@intel.com> wrote: [..] > > -ENXIO is fine with me. I just don't see it as often so I don't > > really know what it is. > > > > Bjorn > > Dan, Bjorn, I did a fairly randomized look at various probe functions and ENODEV > seems to be more common. My sort of historical use has been > - ENODEV: General, couldn't establish device presence > - ENXIO: Device was there but something is totally misconfigured > - E*: A matching errno for exactly what went wrong > > My question though is, would it be useful to propagate the error up through > probe? The error from probe becomes the modprobe exit code, or the write to the 'bind' attribute errno. So, it's a choice between "No such device or address", or "No such device". The "or address" mention makes a small bit more sense to me since the device is obviously present as it is visible in lspci, but either error code clearly indicates a driver problem so ENODEV is fine. For the other error codes I think it would be confusing to return something like EINVAL from probe as that would be mistaken as an invalid argument to the modprobe without stracing to see that it came from the result of a sysfs write
On 20-11-23 11:32:33, Dan Williams wrote: > On Mon, Nov 23, 2020 at 11:20 AM Ben Widawsky <ben.widawsky@intel.com> wrote: > [..] > > > -ENXIO is fine with me. I just don't see it as often so I don't > > > really know what it is. > > > > > > Bjorn > > > > Dan, Bjorn, I did a fairly randomized look at various probe functions and ENODEV > > seems to be more common. My sort of historical use has been > > - ENODEV: General, couldn't establish device presence > > - ENXIO: Device was there but something is totally misconfigured > > - E*: A matching errno for exactly what went wrong > > > > My question though is, would it be useful to propagate the error up through > > probe? > > The error from probe becomes the modprobe exit code, or the write to > the 'bind' attribute errno. So, it's a choice between "No such device > or address", or "No such device". The "or address" mention makes a > small bit more sense to me since the device is obviously present as it > is visible in lspci, but either error code clearly indicates a driver > problem so ENODEV is fine. > > For the other error codes I think it would be confusing to return > something like EINVAL from probe as that would be mistaken as an > invalid argument to the modprobe without stracing to see that it came > from the result of a sysfs write Currently in this path there are 2 general reasons for failure: 1. Driver internal problem, ENOMEM or some such. 2. Device problem (the memory device capability isn't present). I think I'll return ENODEV for the former and ENXIO for the latter. If that sounds good to everyone else.
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index aa7d881fa47b..8d9b9ab6c5ea 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -7,9 +7,49 @@ #include "pci.h" struct cxl_mem { + struct pci_dev *pdev; void __iomem *regs; }; +static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo, u32 reg_hi) +{ + struct device *dev = &pdev->dev; + struct cxl_mem *cxlm; + void __iomem *regs; + u64 offset; + u8 bar; + int rc; + + offset = ((u64)reg_hi << 32) | (reg_lo & 0xffff0000); + bar = reg_lo & 0x7; + + /* Basic sanity check that BAR is big enough */ + if (pci_resource_len(pdev, bar) < offset) { + dev_err(dev, "bar%d: %pr: too small (offset: %#llx)\n", + bar, &pdev->resource[bar], (unsigned long long) offset); + return ERR_PTR(-ENXIO); + } + + rc = pcim_iomap_regions(pdev, 1 << bar, pci_name(pdev)); + if (rc != 0) { + dev_err(dev, "failed to map registers\n"); + return ERR_PTR(-ENXIO); + } + + cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL); + if (!cxlm) { + dev_err(dev, "No memory available\n"); + return ERR_PTR(-ENOMEM); + } + + regs = pcim_iomap_table(pdev)[bar]; + cxlm->pdev = pdev; + cxlm->regs = regs + offset; + + dev_dbg(dev, "Mapped CXL Memory Device resource\n"); + return cxlm; +} + static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) { int pos; @@ -34,9 +74,9 @@ static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec) static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) { + struct cxl_mem *cxlm = ERR_PTR(-ENXIO); struct device *dev = &pdev->dev; - struct cxl_mem *cxlm; - int rc, regloc; + int rc, regloc, i; rc = cxl_bus_prepared(pdev); if (rc != 0) { @@ -44,15 +84,33 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id) return rc; } + rc = pcim_enable_device(pdev); + if (rc) + return rc; + regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC); if (!regloc) { dev_err(dev, "register location dvsec not found\n"); return -ENXIO; } + regloc += 0xc; /* Skip DVSEC + reserved fields */ + + for (i = regloc; i < regloc + 0x24; i += 8) { + u32 reg_lo, reg_hi; + + pci_read_config_dword(pdev, i, ®_lo); + pci_read_config_dword(pdev, i + 4, ®_hi); + + if (CXL_REGLOG_IS_MEMDEV(reg_lo)) { + cxlm = cxl_mem_create(pdev, reg_lo, reg_hi); + break; + } + } + + if (IS_ERR(cxlm)) + return -ENXIO; - cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL); - if (!cxlm) - return -ENOMEM; + pci_set_drvdata(pdev, cxlm); return 0; } diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h index beb03921e6da..be87f62e9132 100644 --- a/drivers/cxl/pci.h +++ b/drivers/cxl/pci.h @@ -12,4 +12,10 @@ #define PCI_DVSEC_ID_CXL 0x0 #define PCI_DVSEC_ID_CXL_REGLOC 0x8 +#define CXL_REGLOG_RBI_EMPTY 0 +#define CXL_REGLOG_RBI_COMPONENT 1 +#define CXL_REGLOG_RBI_VIRT 2 +#define CXL_REGLOG_RBI_MEMDEV 3 +#define CXL_REGLOG_IS_MEMDEV(x) ((((x) >> 8) & 0xff) == CXL_REGLOG_RBI_MEMDEV) + #endif /* __CXL_PCI_H__ */
All the necessary bits are initialized in order to find and map the register space for CXL Memory Devices. This is accomplished by using the Register Locator DVSEC (CXL 2.0 - 8.1.9.1) to determine which PCI BAR to use, and how much of an offset from that BAR should be added. If the memory device registers are found and mapped a new internal data structure tracking device state is allocated. Signed-off-by: Ben Widawsky <ben.widawsky@intel.com> --- drivers/cxl/mem.c | 68 +++++++++++++++++++++++++++++++++++++++++++---- drivers/cxl/pci.h | 6 +++++ 2 files changed, 69 insertions(+), 5 deletions(-)