Message ID | 20210429190926.5086-11-smalin@marvell.com |
---|---|
State | Superseded |
Headers | show |
Series | NVMeTCP Offload ULP and QEDN Device Driver | expand |
On 4/29/21 9:09 PM, Shai Malin wrote: > From: Dean Balandin <dbalandin@marvell.com> > > As part of create_ctrl(), it scans the registered devices and calls > the claim_dev op on each of them, to find the first devices that matches > the connection params. Once the correct devices is found (claim_dev > returns true), we raise the refcnt of that device and return that device > as the device to be used for ctrl currently being created. > > Acked-by: Igor Russkikh <irusskikh@marvell.com> > Signed-off-by: Dean Balandin <dbalandin@marvell.com> > Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com> > Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com> > Signed-off-by: Michal Kalderon <mkalderon@marvell.com> > Signed-off-by: Ariel Elior <aelior@marvell.com> > Signed-off-by: Shai Malin <smalin@marvell.com> > --- > drivers/nvme/host/tcp-offload.c | 94 +++++++++++++++++++++++++++++++++ > 1 file changed, 94 insertions(+) > > diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c > index 711232eba339..aa7cc239abf2 100644 > --- a/drivers/nvme/host/tcp-offload.c > +++ b/drivers/nvme/host/tcp-offload.c > @@ -13,6 +13,11 @@ > static LIST_HEAD(nvme_tcp_ofld_devices); > static DECLARE_RWSEM(nvme_tcp_ofld_devices_rwsem); > > +static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl) > +{ > + return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl); > +} > + > /** > * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration > * function. > @@ -98,6 +103,94 @@ void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req, > /* Placeholder - complete request with/without error */ > } > > +struct nvme_tcp_ofld_dev * > +nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl) > +{ > + struct nvme_tcp_ofld_dev *dev; > + > + down_read(&nvme_tcp_ofld_devices_rwsem); > + list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) { > + if (dev->ops->claim_dev(dev, &ctrl->conn_params)) { > + /* Increase driver refcnt */ > + if (!try_module_get(dev->ops->module)) { > + pr_err("try_module_get failed\n"); > + dev = NULL; > + } > + > + goto out; > + } > + } > + > + dev = NULL; > +out: > + up_read(&nvme_tcp_ofld_devices_rwsem); > + > + return dev; > +} > + > +static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new) > +{ > + /* Placeholder - validates inputs and creates admin and IO queues */ > + > + return 0; > +} > + > +static struct nvme_ctrl * > +nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts) > +{ > + struct nvme_tcp_ofld_ctrl *ctrl; > + struct nvme_tcp_ofld_dev *dev; > + struct nvme_ctrl *nctrl; > + int rc = 0; > + > + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); > + if (!ctrl) > + return ERR_PTR(-ENOMEM); > + > + nctrl = &ctrl->nctrl; > + > + /* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */ > + > + /* Find device that can reach the dest addr */ > + dev = nvme_tcp_ofld_lookup_dev(ctrl); > + if (!dev) { > + pr_info("no device found for addr %s:%s.\n", > + opts->traddr, opts->trsvcid); > + rc = -EINVAL; > + goto out_free_ctrl; > + } > + > + ctrl->dev = dev; > + > + if (ctrl->dev->ops->max_hw_sectors) > + nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors; > + if (ctrl->dev->ops->max_segments) > + nctrl->max_segments = ctrl->dev->ops->max_segments; > + > + /* Init queues */ > + > + /* Call nvme_init_ctrl */ > + > + rc = ctrl->dev->ops->setup_ctrl(ctrl, true); > + if (rc) > + goto out_module_put; > + > + rc = nvme_tcp_ofld_setup_ctrl(nctrl, true); > + if (rc) > + goto out_uninit_ctrl; > + > + return nctrl; > + > +out_uninit_ctrl: > + ctrl->dev->ops->release_ctrl(ctrl); > +out_module_put: > + module_put(dev->ops->module); > +out_free_ctrl: > + kfree(ctrl); > + > + return ERR_PTR(rc); > +} > + > static struct nvmf_transport_ops nvme_tcp_ofld_transport = { > .name = "tcp_offload", > .module = THIS_MODULE, > @@ -107,6 +200,7 @@ static struct nvmf_transport_ops nvme_tcp_ofld_transport = { > NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST | > NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES | > NVMF_OPT_TOS, > + .create_ctrl = nvme_tcp_ofld_create_ctrl, > }; > > static int __init nvme_tcp_ofld_init_module(void) > I wonder if we shouldn't take the approach from Martin Belanger, and introduce a new option 'host_iface' to select the interface to use. That is, _if_ the nvme-tcp offload driver would present itself as a network interface; one might argue that it would put too much restriction on the implementations. But if it does not present itself as a network interface, how do we address it? And if it does, wouldn't we be better off to specify the interface directly, and not try to imply the interface from the IP address? Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
On 5/1/21 3:25 PM, Hannes Reinecke wrote: > On 4/29/21 9:09 PM, Shai Malin wrote: > > From: Dean Balandin <dbalandin@marvell.com> > > > > As part of create_ctrl(), it scans the registered devices and calls > > the claim_dev op on each of them, to find the first devices that matches > > the connection params. Once the correct devices is found (claim_dev > > returns true), we raise the refcnt of that device and return that device > > as the device to be used for ctrl currently being created. > > > > Acked-by: Igor Russkikh <irusskikh@marvell.com> > > Signed-off-by: Dean Balandin <dbalandin@marvell.com> > > Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com> > > Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com> > > Signed-off-by: Michal Kalderon <mkalderon@marvell.com> > > Signed-off-by: Ariel Elior <aelior@marvell.com> > > Signed-off-by: Shai Malin <smalin@marvell.com> > > --- > > drivers/nvme/host/tcp-offload.c | 94 +++++++++++++++++++++++++++++++++ > > 1 file changed, 94 insertions(+) > > > > diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c > > index 711232eba339..aa7cc239abf2 100644 > > --- a/drivers/nvme/host/tcp-offload.c > > +++ b/drivers/nvme/host/tcp-offload.c > > @@ -13,6 +13,11 @@ > > static LIST_HEAD(nvme_tcp_ofld_devices); > > static DECLARE_RWSEM(nvme_tcp_ofld_devices_rwsem); > > > > +static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl) > > +{ > > + return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl); > > +} > > + > > /** > > * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration > > * function. > > @@ -98,6 +103,94 @@ void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req, > > /* Placeholder - complete request with/without error */ > > } > > > > +struct nvme_tcp_ofld_dev * > > +nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl) > > +{ > > + struct nvme_tcp_ofld_dev *dev; > > + > > + down_read(&nvme_tcp_ofld_devices_rwsem); > > + list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) { > > + if (dev->ops->claim_dev(dev, &ctrl->conn_params)) { > > + /* Increase driver refcnt */ > > + if (!try_module_get(dev->ops->module)) { > > + pr_err("try_module_get failed\n"); > > + dev = NULL; > > + } > > + > > + goto out; > > + } > > + } > > + > > + dev = NULL; > > +out: > > + up_read(&nvme_tcp_ofld_devices_rwsem); > > + > > + return dev; > > +} > > + > > +static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new) > > +{ > > + /* Placeholder - validates inputs and creates admin and IO queues */ > > + > > + return 0; > > +} > > + > > +static struct nvme_ctrl * > > +nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts) > > +{ > > + struct nvme_tcp_ofld_ctrl *ctrl; > > + struct nvme_tcp_ofld_dev *dev; > > + struct nvme_ctrl *nctrl; > > + int rc = 0; > > + > > + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); > > + if (!ctrl) > > + return ERR_PTR(-ENOMEM); > > + > > + nctrl = &ctrl->nctrl; > > + > > + /* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */ > > + > > + /* Find device that can reach the dest addr */ > > + dev = nvme_tcp_ofld_lookup_dev(ctrl); > > + if (!dev) { > > + pr_info("no device found for addr %s:%s.\n", > > + opts->traddr, opts->trsvcid); > > + rc = -EINVAL; > > + goto out_free_ctrl; > > + } > > + > > + ctrl->dev = dev; > > + > > + if (ctrl->dev->ops->max_hw_sectors) > > + nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors; > > + if (ctrl->dev->ops->max_segments) > > + nctrl->max_segments = ctrl->dev->ops->max_segments; > > + > > + /* Init queues */ > > + > > + /* Call nvme_init_ctrl */ > > + > > + rc = ctrl->dev->ops->setup_ctrl(ctrl, true); > > + if (rc) > > + goto out_module_put; > > + > > + rc = nvme_tcp_ofld_setup_ctrl(nctrl, true); > > + if (rc) > > + goto out_uninit_ctrl; > > + > > + return nctrl; > > + > > +out_uninit_ctrl: > > + ctrl->dev->ops->release_ctrl(ctrl); > > +out_module_put: > > + module_put(dev->ops->module); > > +out_free_ctrl: > > + kfree(ctrl); > > + > > + return ERR_PTR(rc); > > +} > > + > > static struct nvmf_transport_ops nvme_tcp_ofld_transport = { > > .name = "tcp_offload", > > .module = THIS_MODULE, > > @@ -107,6 +200,7 @@ static struct nvmf_transport_ops nvme_tcp_ofld_transport = { > > NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST | > > NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES | > > NVMF_OPT_TOS, > > + .create_ctrl = nvme_tcp_ofld_create_ctrl, > > }; > > > > static int __init nvme_tcp_ofld_init_module(void) > > > I wonder if we shouldn't take the approach from Martin Belanger, and > introduce a new option 'host_iface' to select the interface to use. > That is, _if_ the nvme-tcp offload driver would present itself as a > network interface; one might argue that it would put too much > restriction on the implementations. We should add the new option 'host_iface' also to nvme-tcp-offload and each vendor-specific driver shall register with this to nvme_tcp_ofld_ops. > But if it does not present itself as a network interface, how do we > address it? And if it does, wouldn't we be better off to specify the > interface directly, and not try to imply the interface from the IP address? Specifically for the Marvell qedn driver, we are pairing between the net-device (qede) and the offload-device (qedn) on each port. The usage for the user is: Assign IP to the net-device (from any existing linux tool): ip addr add 100.100.0.101/24 dev p1p1 This IP will be used by both net-device (qede) and offload-device (qedn). In order to connect from "sw" nvme-tcp through the net-device (qede): nvme connect -t tcp -s 4420 -a 100.100.0.100 -n testnqn In order to connect from "offload" nvme-tcp through the offload-device (qedn): nvme connect -t tcp_offload -s 4420 -a 100.100.0.100 -n testnqn A suggestion (by Hannes Reinecke), as a future enhancement, and in order to simplify the usage, we suggest modifying nvme-cli with a new flag that will determine if "-t tcp" should be the regular nvme-tcp (which will be the default) or nvme-tcp-offload. For exmaple: nvme connect -t tcp -s 4420 -a 100.100.0.100 -n testnqn -[new flag] > > Cheers, > > Hannes > -- > Dr. Hannes Reinecke Kernel Storage Architect > hare@suse.de +49 911 74053 688 > SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg > HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c index 711232eba339..aa7cc239abf2 100644 --- a/drivers/nvme/host/tcp-offload.c +++ b/drivers/nvme/host/tcp-offload.c @@ -13,6 +13,11 @@ static LIST_HEAD(nvme_tcp_ofld_devices); static DECLARE_RWSEM(nvme_tcp_ofld_devices_rwsem); +static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl) +{ + return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl); +} + /** * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration * function. @@ -98,6 +103,94 @@ void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req, /* Placeholder - complete request with/without error */ } +struct nvme_tcp_ofld_dev * +nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl) +{ + struct nvme_tcp_ofld_dev *dev; + + down_read(&nvme_tcp_ofld_devices_rwsem); + list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) { + if (dev->ops->claim_dev(dev, &ctrl->conn_params)) { + /* Increase driver refcnt */ + if (!try_module_get(dev->ops->module)) { + pr_err("try_module_get failed\n"); + dev = NULL; + } + + goto out; + } + } + + dev = NULL; +out: + up_read(&nvme_tcp_ofld_devices_rwsem); + + return dev; +} + +static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new) +{ + /* Placeholder - validates inputs and creates admin and IO queues */ + + return 0; +} + +static struct nvme_ctrl * +nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts) +{ + struct nvme_tcp_ofld_ctrl *ctrl; + struct nvme_tcp_ofld_dev *dev; + struct nvme_ctrl *nctrl; + int rc = 0; + + ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); + if (!ctrl) + return ERR_PTR(-ENOMEM); + + nctrl = &ctrl->nctrl; + + /* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */ + + /* Find device that can reach the dest addr */ + dev = nvme_tcp_ofld_lookup_dev(ctrl); + if (!dev) { + pr_info("no device found for addr %s:%s.\n", + opts->traddr, opts->trsvcid); + rc = -EINVAL; + goto out_free_ctrl; + } + + ctrl->dev = dev; + + if (ctrl->dev->ops->max_hw_sectors) + nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors; + if (ctrl->dev->ops->max_segments) + nctrl->max_segments = ctrl->dev->ops->max_segments; + + /* Init queues */ + + /* Call nvme_init_ctrl */ + + rc = ctrl->dev->ops->setup_ctrl(ctrl, true); + if (rc) + goto out_module_put; + + rc = nvme_tcp_ofld_setup_ctrl(nctrl, true); + if (rc) + goto out_uninit_ctrl; + + return nctrl; + +out_uninit_ctrl: + ctrl->dev->ops->release_ctrl(ctrl); +out_module_put: + module_put(dev->ops->module); +out_free_ctrl: + kfree(ctrl); + + return ERR_PTR(rc); +} + static struct nvmf_transport_ops nvme_tcp_ofld_transport = { .name = "tcp_offload", .module = THIS_MODULE, @@ -107,6 +200,7 @@ static struct nvmf_transport_ops nvme_tcp_ofld_transport = { NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES | NVMF_OPT_TOS, + .create_ctrl = nvme_tcp_ofld_create_ctrl, }; static int __init nvme_tcp_ofld_init_module(void)