diff mbox series

[RFC,v4,10/27] nvme-tcp-offload: Add device scan implementation

Message ID 20210429190926.5086-11-smalin@marvell.com
State Superseded
Headers show
Series NVMeTCP Offload ULP and QEDN Device Driver | expand

Commit Message

Shai Malin April 29, 2021, 7:09 p.m. UTC
From: Dean Balandin <dbalandin@marvell.com>

As part of create_ctrl(), it scans the registered devices and calls
the claim_dev op on each of them, to find the first devices that matches
the connection params. Once the correct devices is found (claim_dev
returns true), we raise the refcnt of that device and return that device
as the device to be used for ctrl currently being created.

Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Dean Balandin <dbalandin@marvell.com>
Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>
Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>
Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Shai Malin <smalin@marvell.com>
---
 drivers/nvme/host/tcp-offload.c | 94 +++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

Comments

Hannes Reinecke May 1, 2021, 12:25 p.m. UTC | #1
On 4/29/21 9:09 PM, Shai Malin wrote:
> From: Dean Balandin <dbalandin@marvell.com>

> 

> As part of create_ctrl(), it scans the registered devices and calls

> the claim_dev op on each of them, to find the first devices that matches

> the connection params. Once the correct devices is found (claim_dev

> returns true), we raise the refcnt of that device and return that device

> as the device to be used for ctrl currently being created.

> 

> Acked-by: Igor Russkikh <irusskikh@marvell.com>

> Signed-off-by: Dean Balandin <dbalandin@marvell.com>

> Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>

> Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>

> Signed-off-by: Michal Kalderon <mkalderon@marvell.com>

> Signed-off-by: Ariel Elior <aelior@marvell.com>

> Signed-off-by: Shai Malin <smalin@marvell.com>

> ---

>   drivers/nvme/host/tcp-offload.c | 94 +++++++++++++++++++++++++++++++++

>   1 file changed, 94 insertions(+)

> 

> diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c

> index 711232eba339..aa7cc239abf2 100644

> --- a/drivers/nvme/host/tcp-offload.c

> +++ b/drivers/nvme/host/tcp-offload.c

> @@ -13,6 +13,11 @@

>   static LIST_HEAD(nvme_tcp_ofld_devices);

>   static DECLARE_RWSEM(nvme_tcp_ofld_devices_rwsem);

>   

> +static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl)

> +{

> +	return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl);

> +}

> +

>   /**

>    * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration

>    * function.

> @@ -98,6 +103,94 @@ void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,

>   	/* Placeholder - complete request with/without error */

>   }

>   

> +struct nvme_tcp_ofld_dev *

> +nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl)

> +{

> +	struct nvme_tcp_ofld_dev *dev;

> +

> +	down_read(&nvme_tcp_ofld_devices_rwsem);

> +	list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) {

> +		if (dev->ops->claim_dev(dev, &ctrl->conn_params)) {

> +			/* Increase driver refcnt */

> +			if (!try_module_get(dev->ops->module)) {

> +				pr_err("try_module_get failed\n");

> +				dev = NULL;

> +			}

> +

> +			goto out;

> +		}

> +	}

> +

> +	dev = NULL;

> +out:

> +	up_read(&nvme_tcp_ofld_devices_rwsem);

> +

> +	return dev;

> +}

> +

> +static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)

> +{

> +	/* Placeholder - validates inputs and creates admin and IO queues */

> +

> +	return 0;

> +}

> +

> +static struct nvme_ctrl *

> +nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)

> +{

> +	struct nvme_tcp_ofld_ctrl *ctrl;

> +	struct nvme_tcp_ofld_dev *dev;

> +	struct nvme_ctrl *nctrl;

> +	int rc = 0;

> +

> +	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);

> +	if (!ctrl)

> +		return ERR_PTR(-ENOMEM);

> +

> +	nctrl = &ctrl->nctrl;

> +

> +	/* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */

> +

> +	/* Find device that can reach the dest addr */

> +	dev = nvme_tcp_ofld_lookup_dev(ctrl);

> +	if (!dev) {

> +		pr_info("no device found for addr %s:%s.\n",

> +			opts->traddr, opts->trsvcid);

> +		rc = -EINVAL;

> +		goto out_free_ctrl;

> +	}

> +

> +	ctrl->dev = dev;

> +

> +	if (ctrl->dev->ops->max_hw_sectors)

> +		nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors;

> +	if (ctrl->dev->ops->max_segments)

> +		nctrl->max_segments = ctrl->dev->ops->max_segments;

> +

> +	/* Init queues */

> +

> +	/* Call nvme_init_ctrl */

> +

> +	rc = ctrl->dev->ops->setup_ctrl(ctrl, true);

> +	if (rc)

> +		goto out_module_put;

> +

> +	rc = nvme_tcp_ofld_setup_ctrl(nctrl, true);

> +	if (rc)

> +		goto out_uninit_ctrl;

> +

> +	return nctrl;

> +

> +out_uninit_ctrl:

> +	ctrl->dev->ops->release_ctrl(ctrl);

> +out_module_put:

> +	module_put(dev->ops->module);

> +out_free_ctrl:

> +	kfree(ctrl);

> +

> +	return ERR_PTR(rc);

> +}

> +

>   static struct nvmf_transport_ops nvme_tcp_ofld_transport = {

>   	.name		= "tcp_offload",

>   	.module		= THIS_MODULE,

> @@ -107,6 +200,7 @@ static struct nvmf_transport_ops nvme_tcp_ofld_transport = {

>   			  NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST |

>   			  NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES |

>   			  NVMF_OPT_TOS,

> +	.create_ctrl	= nvme_tcp_ofld_create_ctrl,

>   };

>   

>   static int __init nvme_tcp_ofld_init_module(void)

> 

I wonder if we shouldn't take the approach from Martin Belanger, and 
introduce a new option 'host_iface' to select the interface to use.
That is, _if_ the nvme-tcp offload driver would present itself as a 
network interface; one might argue that it would put too much 
restriction on the implementations.
But if it does not present itself as a network interface, how do we 
address it? And if it does, wouldn't we be better off to specify the 
interface directly, and not try to imply the interface from the IP address?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
Shai Malin May 5, 2021, 5:52 p.m. UTC | #2
On 5/1/21 3:25 PM, Hannes Reinecke wrote:
> On 4/29/21 9:09 PM, Shai Malin wrote:

> > From: Dean Balandin <dbalandin@marvell.com>

> >

> > As part of create_ctrl(), it scans the registered devices and calls

> > the claim_dev op on each of them, to find the first devices that matches

> > the connection params. Once the correct devices is found (claim_dev

> > returns true), we raise the refcnt of that device and return that device

> > as the device to be used for ctrl currently being created.

> >

> > Acked-by: Igor Russkikh <irusskikh@marvell.com>

> > Signed-off-by: Dean Balandin <dbalandin@marvell.com>

> > Signed-off-by: Prabhakar Kushwaha <pkushwaha@marvell.com>

> > Signed-off-by: Omkar Kulkarni <okulkarni@marvell.com>

> > Signed-off-by: Michal Kalderon <mkalderon@marvell.com>

> > Signed-off-by: Ariel Elior <aelior@marvell.com>

> > Signed-off-by: Shai Malin <smalin@marvell.com>

> > ---

> >   drivers/nvme/host/tcp-offload.c | 94 +++++++++++++++++++++++++++++++++

> >   1 file changed, 94 insertions(+)

> >

> > diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c

> > index 711232eba339..aa7cc239abf2 100644

> > --- a/drivers/nvme/host/tcp-offload.c

> > +++ b/drivers/nvme/host/tcp-offload.c

> > @@ -13,6 +13,11 @@

> >   static LIST_HEAD(nvme_tcp_ofld_devices);

> >   static DECLARE_RWSEM(nvme_tcp_ofld_devices_rwsem);

> >

> > +static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl)

> > +{

> > +     return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl);

> > +}

> > +

> >   /**

> >    * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration

> >    * function.

> > @@ -98,6 +103,94 @@ void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,

> >       /* Placeholder - complete request with/without error */

> >   }

> >

> > +struct nvme_tcp_ofld_dev *

> > +nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl)

> > +{

> > +     struct nvme_tcp_ofld_dev *dev;

> > +

> > +     down_read(&nvme_tcp_ofld_devices_rwsem);

> > +     list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) {

> > +             if (dev->ops->claim_dev(dev, &ctrl->conn_params)) {

> > +                     /* Increase driver refcnt */

> > +                     if (!try_module_get(dev->ops->module)) {

> > +                             pr_err("try_module_get failed\n");

> > +                             dev = NULL;

> > +                     }

> > +

> > +                     goto out;

> > +             }

> > +     }

> > +

> > +     dev = NULL;

> > +out:

> > +     up_read(&nvme_tcp_ofld_devices_rwsem);

> > +

> > +     return dev;

> > +}

> > +

> > +static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)

> > +{

> > +     /* Placeholder - validates inputs and creates admin and IO queues */

> > +

> > +     return 0;

> > +}

> > +

> > +static struct nvme_ctrl *

> > +nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)

> > +{

> > +     struct nvme_tcp_ofld_ctrl *ctrl;

> > +     struct nvme_tcp_ofld_dev *dev;

> > +     struct nvme_ctrl *nctrl;

> > +     int rc = 0;

> > +

> > +     ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);

> > +     if (!ctrl)

> > +             return ERR_PTR(-ENOMEM);

> > +

> > +     nctrl = &ctrl->nctrl;

> > +

> > +     /* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */

> > +

> > +     /* Find device that can reach the dest addr */

> > +     dev = nvme_tcp_ofld_lookup_dev(ctrl);

> > +     if (!dev) {

> > +             pr_info("no device found for addr %s:%s.\n",

> > +                     opts->traddr, opts->trsvcid);

> > +             rc = -EINVAL;

> > +             goto out_free_ctrl;

> > +     }

> > +

> > +     ctrl->dev = dev;

> > +

> > +     if (ctrl->dev->ops->max_hw_sectors)

> > +             nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors;

> > +     if (ctrl->dev->ops->max_segments)

> > +             nctrl->max_segments = ctrl->dev->ops->max_segments;

> > +

> > +     /* Init queues */

> > +

> > +     /* Call nvme_init_ctrl */

> > +

> > +     rc = ctrl->dev->ops->setup_ctrl(ctrl, true);

> > +     if (rc)

> > +             goto out_module_put;

> > +

> > +     rc = nvme_tcp_ofld_setup_ctrl(nctrl, true);

> > +     if (rc)

> > +             goto out_uninit_ctrl;

> > +

> > +     return nctrl;

> > +

> > +out_uninit_ctrl:

> > +     ctrl->dev->ops->release_ctrl(ctrl);

> > +out_module_put:

> > +     module_put(dev->ops->module);

> > +out_free_ctrl:

> > +     kfree(ctrl);

> > +

> > +     return ERR_PTR(rc);

> > +}

> > +

> >   static struct nvmf_transport_ops nvme_tcp_ofld_transport = {

> >       .name           = "tcp_offload",

> >       .module         = THIS_MODULE,

> > @@ -107,6 +200,7 @@ static struct nvmf_transport_ops nvme_tcp_ofld_transport = {

> >                         NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST |

> >                         NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES |

> >                         NVMF_OPT_TOS,

> > +     .create_ctrl    = nvme_tcp_ofld_create_ctrl,

> >   };

> >

> >   static int __init nvme_tcp_ofld_init_module(void)

> >

> I wonder if we shouldn't take the approach from Martin Belanger, and

> introduce a new option 'host_iface' to select the interface to use.

> That is, _if_ the nvme-tcp offload driver would present itself as a

> network interface; one might argue that it would put too much

> restriction on the implementations.


We should add the new option 'host_iface' also to nvme-tcp-offload and
each vendor-specific driver shall register with this to nvme_tcp_ofld_ops.

> But if it does not present itself as a network interface, how do we

> address it? And if it does, wouldn't we be better off to specify the

> interface directly, and not try to imply the interface from the IP address?


Specifically for the Marvell qedn driver, we are pairing between the net-device
(qede) and the offload-device (qedn) on each port.

The usage for the user is:
Assign IP to the net-device (from any existing linux tool):

    ip addr add 100.100.0.101/24 dev p1p1

This IP will be used by both net-device (qede) and offload-device (qedn).
In order to connect from "sw" nvme-tcp through the net-device (qede):

    nvme connect -t tcp -s 4420 -a 100.100.0.100 -n testnqn

In order to connect from "offload" nvme-tcp through the offload-device (qedn):

    nvme connect -t tcp_offload -s 4420 -a 100.100.0.100 -n testnqn

A suggestion (by Hannes Reinecke), as a future enhancement, and in
order to simplify the usage, we suggest modifying nvme-cli with a new flag
that will determine if "-t tcp" should be the regular nvme-tcp (which will
be the default) or nvme-tcp-offload.
For exmaple:

    nvme connect -t tcp -s 4420 -a 100.100.0.100 -n testnqn -[new flag]

>

> Cheers,

>

> Hannes

> --

> Dr. Hannes Reinecke                Kernel Storage Architect

> hare@suse.de                              +49 911 74053 688

> SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg

> HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer
diff mbox series

Patch

diff --git a/drivers/nvme/host/tcp-offload.c b/drivers/nvme/host/tcp-offload.c
index 711232eba339..aa7cc239abf2 100644
--- a/drivers/nvme/host/tcp-offload.c
+++ b/drivers/nvme/host/tcp-offload.c
@@ -13,6 +13,11 @@ 
 static LIST_HEAD(nvme_tcp_ofld_devices);
 static DECLARE_RWSEM(nvme_tcp_ofld_devices_rwsem);
 
+static inline struct nvme_tcp_ofld_ctrl *to_tcp_ofld_ctrl(struct nvme_ctrl *nctrl)
+{
+	return container_of(nctrl, struct nvme_tcp_ofld_ctrl, nctrl);
+}
+
 /**
  * nvme_tcp_ofld_register_dev() - NVMeTCP Offload Library registration
  * function.
@@ -98,6 +103,94 @@  void nvme_tcp_ofld_req_done(struct nvme_tcp_ofld_req *req,
 	/* Placeholder - complete request with/without error */
 }
 
+struct nvme_tcp_ofld_dev *
+nvme_tcp_ofld_lookup_dev(struct nvme_tcp_ofld_ctrl *ctrl)
+{
+	struct nvme_tcp_ofld_dev *dev;
+
+	down_read(&nvme_tcp_ofld_devices_rwsem);
+	list_for_each_entry(dev, &nvme_tcp_ofld_devices, entry) {
+		if (dev->ops->claim_dev(dev, &ctrl->conn_params)) {
+			/* Increase driver refcnt */
+			if (!try_module_get(dev->ops->module)) {
+				pr_err("try_module_get failed\n");
+				dev = NULL;
+			}
+
+			goto out;
+		}
+	}
+
+	dev = NULL;
+out:
+	up_read(&nvme_tcp_ofld_devices_rwsem);
+
+	return dev;
+}
+
+static int nvme_tcp_ofld_setup_ctrl(struct nvme_ctrl *nctrl, bool new)
+{
+	/* Placeholder - validates inputs and creates admin and IO queues */
+
+	return 0;
+}
+
+static struct nvme_ctrl *
+nvme_tcp_ofld_create_ctrl(struct device *ndev, struct nvmf_ctrl_options *opts)
+{
+	struct nvme_tcp_ofld_ctrl *ctrl;
+	struct nvme_tcp_ofld_dev *dev;
+	struct nvme_ctrl *nctrl;
+	int rc = 0;
+
+	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		return ERR_PTR(-ENOMEM);
+
+	nctrl = &ctrl->nctrl;
+
+	/* Init nvme_tcp_ofld_ctrl and nvme_ctrl params based on received opts */
+
+	/* Find device that can reach the dest addr */
+	dev = nvme_tcp_ofld_lookup_dev(ctrl);
+	if (!dev) {
+		pr_info("no device found for addr %s:%s.\n",
+			opts->traddr, opts->trsvcid);
+		rc = -EINVAL;
+		goto out_free_ctrl;
+	}
+
+	ctrl->dev = dev;
+
+	if (ctrl->dev->ops->max_hw_sectors)
+		nctrl->max_hw_sectors = ctrl->dev->ops->max_hw_sectors;
+	if (ctrl->dev->ops->max_segments)
+		nctrl->max_segments = ctrl->dev->ops->max_segments;
+
+	/* Init queues */
+
+	/* Call nvme_init_ctrl */
+
+	rc = ctrl->dev->ops->setup_ctrl(ctrl, true);
+	if (rc)
+		goto out_module_put;
+
+	rc = nvme_tcp_ofld_setup_ctrl(nctrl, true);
+	if (rc)
+		goto out_uninit_ctrl;
+
+	return nctrl;
+
+out_uninit_ctrl:
+	ctrl->dev->ops->release_ctrl(ctrl);
+out_module_put:
+	module_put(dev->ops->module);
+out_free_ctrl:
+	kfree(ctrl);
+
+	return ERR_PTR(rc);
+}
+
 static struct nvmf_transport_ops nvme_tcp_ofld_transport = {
 	.name		= "tcp_offload",
 	.module		= THIS_MODULE,
@@ -107,6 +200,7 @@  static struct nvmf_transport_ops nvme_tcp_ofld_transport = {
 			  NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_HDR_DIGEST |
 			  NVMF_OPT_DATA_DIGEST | NVMF_OPT_NR_POLL_QUEUES |
 			  NVMF_OPT_TOS,
+	.create_ctrl	= nvme_tcp_ofld_create_ctrl,
 };
 
 static int __init nvme_tcp_ofld_init_module(void)