[v4,7/8] mpi3mr: add support for nvme pass-through

Message ID	20220413145652.112271-8-sumit.saxena@broadcom.com
State	Superseded
Headers	show Return-Path: <linux-scsi-owner@kernel.org> From: Sumit Saxena <sumit.saxena@broadcom.com> To: linux-scsi@vger.kernel.org Cc: martin.petersen@oracle.com, bvanassche@acm.org, hch@lst.de, hare@suse.de, himanshu.madhani@oracle.com, sathya.prakash@broadcom.com, kashyap.desai@broadcom.com, chandrakanth.patil@broadcom.com, sreekanth.reddy@broadcom.com, prayas.patel@broadcom.com, Sumit Saxena <sumit.saxena@broadcom.com> Subject: [PATCH v4 7/8] mpi3mr: add support for nvme pass-through Date: Wed, 13 Apr 2022 10:56:51 -0400 Message-Id: <20220413145652.112271-8-sumit.saxena@broadcom.com> In-Reply-To: <20220413145652.112271-1-sumit.saxena@broadcom.com> References: <20220413145652.112271-1-sumit.saxena@broadcom.com> MIME-Version: 1.0 Content-Type: multipart/signed; protocol="application/pkcs7-signature"; micalg=sha-256; boundary="000000000000ddf3d205dc8a6c47" Precedence: bulk
Series	mpi3mr: add BSG interface support for controller management \| expand [v4,0/8] mpi3mr: add BSG interface support for controller management [v4,1/8] mpi3mr: add BSG device support [v4,2/8] mpi3mr: add support for driver commands [v4,3/8] mpi3mr: move data structures/definitions from MPI headers to uapi header [v4,4/8] mpi3mr: add support for MPT commands [v4,5/8] mpi3mr: add support for PEL commands [v4,6/8] mpi3mr: expose adapter state to sysfs [v4,7/8] mpi3mr: add support for nvme pass-through [v4,8/8] mpi3mr: update driver version to 8.0.0.69.0

Message ID

20220413145652.112271-8-sumit.saxena@broadcom.com

State

Superseded

Headers

From: Sumit Saxena <sumit.saxena@broadcom.com>
To: linux-scsi@vger.kernel.org
Cc: martin.petersen@oracle.com, bvanassche@acm.org, hch@lst.de,
        hare@suse.de, himanshu.madhani@oracle.com,
        sathya.prakash@broadcom.com, kashyap.desai@broadcom.com,
        chandrakanth.patil@broadcom.com, sreekanth.reddy@broadcom.com,
        prayas.patel@broadcom.com, Sumit Saxena <sumit.saxena@broadcom.com>
Subject: [PATCH v4 7/8] mpi3mr: add support for nvme pass-through
Date: Wed, 13 Apr 2022 10:56:51 -0400
Message-Id: <20220413145652.112271-8-sumit.saxena@broadcom.com>
In-Reply-To: <20220413145652.112271-1-sumit.saxena@broadcom.com>
References: <20220413145652.112271-1-sumit.saxena@broadcom.com>
MIME-Version: 1.0
Content-Type: multipart/signed; protocol="application/pkcs7-signature";
 micalg=sha-256;
        boundary="000000000000ddf3d205dc8a6c47"
Precedence: bulk

Series

mpi3mr: add BSG interface support for controller management | expand

Commit Message

Sumit Saxena April 13, 2022, 2:56 p.m. UTC

This patch adds support for management applications to send an MPI3
Encapsulated NVMe passthru commands to the NVMe devices attached to
the Avenger controller. Since the NVMe drives are exposed as SCSI
devices by the controller the standard NVMe applications cannot be
used to interact with the drives and the command sets supported is
also limited by the controller firmware. Special handling is required
for MPI3 Encapsulated NVMe passthru commands for PRP/SGL setup in the
commands hence the additional changes.

Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
---
 drivers/scsi/mpi3mr/mpi3mr.h        |  25 ++
 drivers/scsi/mpi3mr/mpi3mr_app.c    | 348 +++++++++++++++++++++++++++-
 include/uapi/scsi/scsi_bsg_mpi3mr.h |   8 +
 3 files changed, 378 insertions(+), 3 deletions(-)

Comments

Himanshu Madhani April 20, 2022, 6:52 p.m. UTC | #1

> On Apr 13, 2022, at 7:56 AM, Sumit Saxena <sumit.saxena@broadcom.com> wrote:
> 
> This patch adds support for management applications to send an MPI3
> Encapsulated NVMe passthru commands to the NVMe devices attached to
> the Avenger controller. Since the NVMe drives are exposed as SCSI
> devices by the controller the standard NVMe applications cannot be
> used to interact with the drives and the command sets supported is
> also limited by the controller firmware. Special handling is required
> for MPI3 Encapsulated NVMe passthru commands for PRP/SGL setup in the
> commands hence the additional changes.
> 
> Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
> ---
> drivers/scsi/mpi3mr/mpi3mr.h        |  25 ++
> drivers/scsi/mpi3mr/mpi3mr_app.c    | 348 +++++++++++++++++++++++++++-
> include/uapi/scsi/scsi_bsg_mpi3mr.h |   8 +
> 3 files changed, 378 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
> index 1de3b006f444..b2dbb6543a9b 100644
> --- a/drivers/scsi/mpi3mr/mpi3mr.h
> +++ b/drivers/scsi/mpi3mr/mpi3mr.h
> @@ -193,6 +193,24 @@ extern atomic64_t event_counter;
>  */
> #define MPI3MR_MAX_APP_XFER_SECTORS	(2048 + 512)
> 
> +/**
> + * struct mpi3mr_nvme_pt_sge -  Structure to store SGEs for NVMe
> + * Encapsulated commands.
> + *
> + * @base_addr: Physical address
> + * @length: SGE length
> + * @rsvd: Reserved
> + * @rsvd1: Reserved
> + * @sgl_type: sgl type
> + */
> +struct mpi3mr_nvme_pt_sge {
> +	u64 base_addr;
> +	u32 length;
> +	u16 rsvd;
> +	u8 rsvd1;
> +	u8 sgl_type;
> +};
> +
> /**
>  * struct mpi3mr_buf_map -  local structure to
>  * track kernel and user buffers associated with an BSG
> @@ -746,6 +764,9 @@ struct scmd_priv {
>  * @reset_waitq: Controller reset  wait queue
>  * @prepare_for_reset: Prepare for reset event received
>  * @prepare_for_reset_timeout_counter: Prepare for reset timeout
> + * @prp_list_virt: NVMe encapsulated PRP list virtual base
> + * @prp_list_dma: NVMe encapsulated PRP list DMA
> + * @prp_sz: NVME encapsulated PRP list size
>  * @diagsave_timeout: Diagnostic information save timeout
>  * @logging_level: Controller debug logging level
>  * @flush_io_count: I/O count to flush after reset
> @@ -901,6 +922,10 @@ struct mpi3mr_ioc {
> 	u8 prepare_for_reset;
> 	u16 prepare_for_reset_timeout_counter;
> 
> +	void *prp_list_virt;
> +	dma_addr_t prp_list_dma;
> +	u32 prp_sz;
> +
> 	u16 diagsave_timeout;
> 	int logging_level;
> 	u16 flush_io_count;
> diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c
> index dada12216b97..428d3fcacbdb 100644
> --- a/drivers/scsi/mpi3mr/mpi3mr_app.c
> +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c
> @@ -621,6 +621,314 @@ static void mpi3mr_bsg_build_sgl(u8 *mpi_req, uint32_t sgl_offset,
> 	}
> }
> 
> +/**
> + * mpi3mr_get_nvme_data_fmt - returns the NVMe data format
> + * @nvme_encap_request: NVMe encapsulated MPI request
> + *
> + * This function returns the type of the data format specified
> + * in user provided NVMe command in NVMe encapsulated request.
> + *
> + * Return: Data format of the NVMe command (PRP/SGL etc)
> + */
> +static unsigned int mpi3mr_get_nvme_data_fmt(
> +	struct mpi3_nvme_encapsulated_request *nvme_encap_request)
> +{
> +	u8 format = 0;
> +
> +	format = ((nvme_encap_request->command[0] & 0xc000) >> 14);
> +	return format;
> +
> +}
> +
> +/**
> + * mpi3mr_build_nvme_sgl - SGL constructor for NVME
> + *				   encapsulated request
> + * @mrioc: Adapter instance reference
> + * @nvme_encap_request: NVMe encapsulated MPI request
> + * @drv_bufs: DMA address of the buffers to be placed in sgl
> + * @bufcnt: Number of DMA buffers
> + *
> + * This function places the DMA address of the given buffers in
> + * proper format as SGEs in the given NVMe encapsulated request.
> + *
> + * Return: 0 on success, -1 on failure
> + */
> +static int mpi3mr_build_nvme_sgl(struct mpi3mr_ioc *mrioc,
> +	struct mpi3_nvme_encapsulated_request *nvme_encap_request,
> +	struct mpi3mr_buf_map *drv_bufs, u8 bufcnt)
> +{
> +	struct mpi3mr_nvme_pt_sge *nvme_sgl;
> +	u64 sgl_ptr;
> +	u8 count;
> +	size_t length = 0;
> +	struct mpi3mr_buf_map *drv_buf_iter = drv_bufs;
> +	u64 sgemod_mask = ((u64)((mrioc->facts.sge_mod_mask) <<
> +			    mrioc->facts.sge_mod_shift) << 32);
> +	u64 sgemod_val = ((u64)(mrioc->facts.sge_mod_value) <<
> +			  mrioc->facts.sge_mod_shift) << 32;
> +
> +	/*
> +	 * Not all commands require a data transfer. If no data, just return
> +	 * without constructing any sgl.
> +	 */
> +	for (count = 0; count < bufcnt; count++, drv_buf_iter++) {
> +		if (drv_buf_iter->data_dir == DMA_NONE)
> +			continue;
> +		sgl_ptr = (u64)drv_buf_iter->kern_buf_dma;
> +		length = drv_buf_iter->kern_buf_len;
> +		break;
> +	}
> +	if (!length)
> +		return 0;
> +
> +	if (sgl_ptr & sgemod_mask) {
> +		dprint_bsg_err(mrioc,
> +		    "%s: SGL address collides with SGE modifier\n",
> +		    __func__);
> +		return -1;
> +	}
> +
> +	sgl_ptr &= ~sgemod_mask;
> +	sgl_ptr |= sgemod_val;
> +	nvme_sgl = (struct mpi3mr_nvme_pt_sge *)
> +	    ((u8 *)(nvme_encap_request->command) + MPI3MR_NVME_CMD_SGL_OFFSET);
> +	memset(nvme_sgl, 0, sizeof(struct mpi3mr_nvme_pt_sge));
> +	nvme_sgl->base_addr = sgl_ptr;
> +	nvme_sgl->length = length;
> +	return 0;
> +}
> +
> +/**
> + * mpi3mr_build_nvme_prp - PRP constructor for NVME
> + *			       encapsulated request
> + * @mrioc: Adapter instance reference
> + * @nvme_encap_request: NVMe encapsulated MPI request
> + * @drv_bufs: DMA address of the buffers to be placed in SGL
> + * @bufcnt: Number of DMA buffers
> + *
> + * This function places the DMA address of the given buffers in
> + * proper format as PRP entries in the given NVMe encapsulated
> + * request.
> + *
> + * Return: 0 on success, -1 on failure
> + */
> +static int mpi3mr_build_nvme_prp(struct mpi3mr_ioc *mrioc,
> +	struct mpi3_nvme_encapsulated_request *nvme_encap_request,
> +	struct mpi3mr_buf_map *drv_bufs, u8 bufcnt)
> +{
> +	int prp_size = MPI3MR_NVME_PRP_SIZE;
> +	__le64 *prp_entry, *prp1_entry, *prp2_entry;
> +	__le64 *prp_page;
> +	dma_addr_t prp_entry_dma, prp_page_dma, dma_addr;
> +	u32 offset, entry_len, dev_pgsz;
> +	u32 page_mask_result, page_mask;
> +	size_t length = 0;
> +	u8 count;
> +	struct mpi3mr_buf_map *drv_buf_iter = drv_bufs;
> +	u64 sgemod_mask = ((u64)((mrioc->facts.sge_mod_mask) <<
> +			    mrioc->facts.sge_mod_shift) << 32);
> +	u64 sgemod_val = ((u64)(mrioc->facts.sge_mod_value) <<
> +			  mrioc->facts.sge_mod_shift) << 32;
> +	u16 dev_handle = nvme_encap_request->dev_handle;
> +	struct mpi3mr_tgt_dev *tgtdev;
> +
> +	tgtdev = mpi3mr_get_tgtdev_by_handle(mrioc, dev_handle);
> +	if (!tgtdev) {
> +		dprint_bsg_err(mrioc, "%s: invalid device handle 0x%04x\n",
> +			__func__, dev_handle);
> +		return -1;
> +	}
> +
> +	if (tgtdev->dev_spec.pcie_inf.pgsz == 0) {
> +		dprint_bsg_err(mrioc,
> +		    "%s: NVMe device page size is zero for handle 0x%04x\n",
> +		    __func__, dev_handle);
> +		mpi3mr_tgtdev_put(tgtdev);
> +		return -1;
> +	}
> +
> +	dev_pgsz = 1 << (tgtdev->dev_spec.pcie_inf.pgsz);
> +	mpi3mr_tgtdev_put(tgtdev);
> +
> +	/*
> +	 * Not all commands require a data transfer. If no data, just return
> +	 * without constructing any PRP.
> +	 */
> +	for (count = 0; count < bufcnt; count++, drv_buf_iter++) {
> +		if (drv_buf_iter->data_dir == DMA_NONE)
> +			continue;
> +		dma_addr = drv_buf_iter->kern_buf_dma;
> +		length = drv_buf_iter->kern_buf_len;
> +		break;
> +	}
> +
> +	if (!length)
> +		return 0;
> +
> +	mrioc->prp_sz = 0;
> +	mrioc->prp_list_virt = dma_alloc_coherent(&mrioc->pdev->dev,
> +	    dev_pgsz, &mrioc->prp_list_dma, GFP_KERNEL);
> +
> +	if (!mrioc->prp_list_virt)
> +		return -1;
> +	mrioc->prp_sz = dev_pgsz;
> +
> +	/*
> +	 * Set pointers to PRP1 and PRP2, which are in the NVMe command.
> +	 * PRP1 is located at a 24 byte offset from the start of the NVMe
> +	 * command.  Then set the current PRP entry pointer to PRP1.
> +	 */
> +	prp1_entry = (__le64 *)((u8 *)(nvme_encap_request->command) +
> +	    MPI3MR_NVME_CMD_PRP1_OFFSET);
> +	prp2_entry = (__le64 *)((u8 *)(nvme_encap_request->command) +
> +	    MPI3MR_NVME_CMD_PRP2_OFFSET);
> +	prp_entry = prp1_entry;
> +	/*
> +	 * For the PRP entries, use the specially allocated buffer of
> +	 * contiguous memory.
> +	 */
> +	prp_page = (__le64 *)mrioc->prp_list_virt;
> +	prp_page_dma = mrioc->prp_list_dma;
> +
> +	/*
> +	 * Check if we are within 1 entry of a page boundary we don't
> +	 * want our first entry to be a PRP List entry.
> +	 */
> +	page_mask = dev_pgsz - 1;
> +	page_mask_result = (uintptr_t)((u8 *)prp_page + prp_size) & page_mask;
> +	if (!page_mask_result) {
> +		dprint_bsg_err(mrioc, "%s: PRP page is not page aligned\n",
> +		    __func__);
> +		goto err_out;
> +	}
> +
> +	/*
> +	 * Set PRP physical pointer, which initially points to the current PRP
> +	 * DMA memory page.
> +	 */
> +	prp_entry_dma = prp_page_dma;
> +
> +
> +	/* Loop while the length is not zero. */
> +	while (length) {
> +		page_mask_result = (prp_entry_dma + prp_size) & page_mask;
> +		if (!page_mask_result && (length >  dev_pgsz)) {
> +			dprint_bsg_err(mrioc,
> +			    "%s: single PRP page is not sufficient\n",
> +			    __func__);
> +			goto err_out;
> +		}
> +
> +		/* Need to handle if entry will be part of a page. */
> +		offset = dma_addr & page_mask;
> +		entry_len = dev_pgsz - offset;
> +
> +		if (prp_entry == prp1_entry) {
> +			/*
> +			 * Must fill in the first PRP pointer (PRP1) before
> +			 * moving on.
> +			 */
> +			*prp1_entry = cpu_to_le64(dma_addr);
> +			if (*prp1_entry & sgemod_mask) {
> +				dprint_bsg_err(mrioc,
> +				    "%s: PRP1 address collides with SGE modifier\n",
> +				    __func__);
> +				goto err_out;
> +			}
> +			*prp1_entry &= ~sgemod_mask;
> +			*prp1_entry |= sgemod_val;
> +
> +			/*
> +			 * Now point to the second PRP entry within the
> +			 * command (PRP2).
> +			 */
> +			prp_entry = prp2_entry;
> +		} else if (prp_entry == prp2_entry) {
> +			/*
> +			 * Should the PRP2 entry be a PRP List pointer or just
> +			 * a regular PRP pointer?  If there is more than one
> +			 * more page of data, must use a PRP List pointer.
> +			 */
> +			if (length > dev_pgsz) {
> +				/*
> +				 * PRP2 will contain a PRP List pointer because
> +				 * more PRP's are needed with this command. The
> +				 * list will start at the beginning of the
> +				 * contiguous buffer.
> +				 */
> +				*prp2_entry = cpu_to_le64(prp_entry_dma);
> +				if (*prp2_entry & sgemod_mask) {
> +					dprint_bsg_err(mrioc,
> +					    "%s: PRP list address collides with SGE modifier\n",
> +					    __func__);
> +					goto err_out;
> +				}
> +				*prp2_entry &= ~sgemod_mask;
> +				*prp2_entry |= sgemod_val;
> +
> +				/*
> +				 * The next PRP Entry will be the start of the
> +				 * first PRP List.
> +				 */
> +				prp_entry = prp_page;
> +				continue;
> +			} else {
> +				/*
> +				 * After this, the PRP Entries are complete.
> +				 * This command uses 2 PRP's and no PRP list.
> +				 */
> +				*prp2_entry = cpu_to_le64(dma_addr);
> +				if (*prp2_entry & sgemod_mask) {
> +					dprint_bsg_err(mrioc,
> +					    "%s: PRP2 collides with SGE modifier\n",
> +					    __func__);
> +					goto err_out;
> +				}
> +				*prp2_entry &= ~sgemod_mask;
> +				*prp2_entry |= sgemod_val;
> +			}
> +		} else {
> +			/*
> +			 * Put entry in list and bump the addresses.
> +			 *
> +			 * After PRP1 and PRP2 are filled in, this will fill in
> +			 * all remaining PRP entries in a PRP List, one per
> +			 * each time through the loop.
> +			 */
> +			*prp_entry = cpu_to_le64(dma_addr);
> +			if (*prp1_entry & sgemod_mask) {
> +				dprint_bsg_err(mrioc,
> +				    "%s: PRP address collides with SGE modifier\n",
> +				    __func__);
> +				goto err_out;
> +			}
> +			*prp_entry &= ~sgemod_mask;
> +			*prp_entry |= sgemod_val;
> +			prp_entry++;
> +			prp_entry_dma++;
> +		}
> +
> +		/*
> +		 * Bump the phys address of the command's data buffer by the
> +		 * entry_len.
> +		 */
> +		dma_addr += entry_len;
> +
> +		/* decrement length accounting for last partial page. */
> +		if (entry_len > length)
> +			length = 0;
> +		else
> +			length -= entry_len;
> +	}
> +	return 0;
> +err_out:
> +	if (mrioc->prp_list_virt) {
> +		dma_free_coherent(&mrioc->pdev->dev, mrioc->prp_sz,
> +		    mrioc->prp_list_virt, mrioc->prp_list_dma);
> +		mrioc->prp_list_virt = NULL;
> +	}
> +	return -1;
> +}
> /**
>  * mpi3mr_bsg_process_mpt_cmds - MPI Pass through BSG handler
>  * @job: BSG job reference
> @@ -652,7 +960,7 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply
> 	struct mpi3mr_buf_map *drv_bufs = NULL, *drv_buf_iter = NULL;
> 	u8 count, bufcnt = 0, is_rmcb = 0, is_rmrb = 0, din_cnt = 0, dout_cnt = 0;
> 	u8 invalid_be = 0, erb_offset = 0xFF, mpirep_offset = 0xFF, sg_entries = 0;
> -	u8 block_io = 0, resp_code = 0;
> +	u8 block_io = 0, resp_code = 0, nvme_fmt = 0;
> 	struct mpi3_request_header *mpi_header = NULL;
> 	struct mpi3_status_reply_descriptor *status_desc;
> 	struct mpi3_scsi_task_mgmt_request *tm_req;
> @@ -892,7 +1200,34 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply
> 		goto out;
> 	}
> 
> -	if (mpi_header->function != MPI3_BSG_FUNCTION_NVME_ENCAPSULATED) {
> +	if (mpi_header->function == MPI3_BSG_FUNCTION_NVME_ENCAPSULATED) {
> +		nvme_fmt = mpi3mr_get_nvme_data_fmt(
> +			(struct mpi3_nvme_encapsulated_request *)mpi_req);
> +		if (nvme_fmt == MPI3MR_NVME_DATA_FORMAT_PRP) {
> +			if (mpi3mr_build_nvme_prp(mrioc,
> +			    (struct mpi3_nvme_encapsulated_request *)mpi_req,
> +			    drv_bufs, bufcnt)) {
> +				rval = -ENOMEM;
> +				mutex_unlock(&mrioc->bsg_cmds.mutex);
> +				goto out;
> +			}
> +		} else if (nvme_fmt == MPI3MR_NVME_DATA_FORMAT_SGL1 ||
> +			nvme_fmt == MPI3MR_NVME_DATA_FORMAT_SGL2) {
> +			if (mpi3mr_build_nvme_sgl(mrioc,
> +			    (struct mpi3_nvme_encapsulated_request *)mpi_req,
> +			    drv_bufs, bufcnt)) {
> +				rval = -EINVAL;
> +				mutex_unlock(&mrioc->bsg_cmds.mutex);
> +				goto out;
> +			}
> +		} else {
> +			dprint_bsg_err(mrioc,
> +			    "%s:invalid NVMe command format\n", __func__);
> +			rval = -EINVAL;
> +			mutex_unlock(&mrioc->bsg_cmds.mutex);
> +			goto out;
> +		}
> +	} else {
> 		mpi3mr_bsg_build_sgl(mpi_req, (mpi_msg_size),
> 		    drv_bufs, bufcnt, is_rmcb, is_rmrb,
> 		    (dout_cnt + din_cnt));
> @@ -970,7 +1305,8 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply
> 			}
> 		}
> 
> -		if (mpi_header->function == MPI3_BSG_FUNCTION_SCSI_IO)
> +		if ((mpi_header->function == MPI3_BSG_FUNCTION_NVME_ENCAPSULATED) ||
> +		    (mpi_header->function == MPI3_BSG_FUNCTION_SCSI_IO))
> 			mpi3mr_issue_tm(mrioc,
> 			    MPI3_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
> 			    mpi_header->function_dependent, 0,
> @@ -984,6 +1320,12 @@ static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply
> 	}
> 	dprint_bsg_info(mrioc, "%s: bsg request is completed\n", __func__);
> 
> +	if (mrioc->prp_list_virt) {
> +		dma_free_coherent(&mrioc->pdev->dev, mrioc->prp_sz,
> +		    mrioc->prp_list_virt, mrioc->prp_list_dma);
> +		mrioc->prp_list_virt = NULL;
> +	}
> +
> 	if ((mrioc->bsg_cmds.ioc_status & MPI3_IOCSTATUS_STATUS_MASK)
> 	     != MPI3_IOCSTATUS_SUCCESS) {
> 		dprint_bsg_info(mrioc,
> diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h
> index 870e6d87dd03..67f14c89b255 100644
> --- a/include/uapi/scsi/scsi_bsg_mpi3mr.h
> +++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h
> @@ -488,6 +488,14 @@ struct mpi3_nvme_encapsulated_error_reply {
> 	__le32                     nvme_completion_entry[4];
> };
> 
> +#define	MPI3MR_NVME_PRP_SIZE		8 /* PRP size */
> +#define	MPI3MR_NVME_CMD_PRP1_OFFSET	24 /* PRP1 offset in NVMe cmd */
> +#define	MPI3MR_NVME_CMD_PRP2_OFFSET	32 /* PRP2 offset in NVMe cmd */
> +#define	MPI3MR_NVME_CMD_SGL_OFFSET	24 /* SGL offset in NVMe cmd */
> +#define MPI3MR_NVME_DATA_FORMAT_PRP	0
> +#define MPI3MR_NVME_DATA_FORMAT_SGL1	1
> +#define MPI3MR_NVME_DATA_FORMAT_SGL2	2
> +
> /* MPI3: task management related definitions */
> struct mpi3_scsi_task_mgmt_request {
> 	__le16                     host_tag;
> -- 
> 2.27.0
> 

Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>

---
Himanshu Madhani	Oracle Linux Engineering

diff --git a/drivers/scsi/mpi3mr/mpi3mr.h b/drivers/scsi/mpi3mr/mpi3mr.h
index 1de3b006f444..b2dbb6543a9b 100644
--- a/drivers/scsi/mpi3mr/mpi3mr.h
+++ b/drivers/scsi/mpi3mr/mpi3mr.h
@@ -193,6 +193,24 @@  extern atomic64_t event_counter;
  */
 #define MPI3MR_MAX_APP_XFER_SECTORS	(2048 + 512)
 
+/**
+ * struct mpi3mr_nvme_pt_sge -  Structure to store SGEs for NVMe
+ * Encapsulated commands.
+ *
+ * @base_addr: Physical address
+ * @length: SGE length
+ * @rsvd: Reserved
+ * @rsvd1: Reserved
+ * @sgl_type: sgl type
+ */
+struct mpi3mr_nvme_pt_sge {
+	u64 base_addr;
+	u32 length;
+	u16 rsvd;
+	u8 rsvd1;
+	u8 sgl_type;
+};
+
 /**
  * struct mpi3mr_buf_map -  local structure to
  * track kernel and user buffers associated with an BSG
@@ -746,6 +764,9 @@  struct scmd_priv {
  * @reset_waitq: Controller reset  wait queue
  * @prepare_for_reset: Prepare for reset event received
  * @prepare_for_reset_timeout_counter: Prepare for reset timeout
+ * @prp_list_virt: NVMe encapsulated PRP list virtual base
+ * @prp_list_dma: NVMe encapsulated PRP list DMA
+ * @prp_sz: NVME encapsulated PRP list size
  * @diagsave_timeout: Diagnostic information save timeout
  * @logging_level: Controller debug logging level
  * @flush_io_count: I/O count to flush after reset
@@ -901,6 +922,10 @@  struct mpi3mr_ioc {
 	u8 prepare_for_reset;
 	u16 prepare_for_reset_timeout_counter;
 
+	void *prp_list_virt;
+	dma_addr_t prp_list_dma;
+	u32 prp_sz;
+
 	u16 diagsave_timeout;
 	int logging_level;
 	u16 flush_io_count;
diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c
index dada12216b97..428d3fcacbdb 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_app.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_app.c
@@ -621,6 +621,314 @@  static void mpi3mr_bsg_build_sgl(u8 *mpi_req, uint32_t sgl_offset,
 	}
 }
 
+/**
+ * mpi3mr_get_nvme_data_fmt - returns the NVMe data format
+ * @nvme_encap_request: NVMe encapsulated MPI request
+ *
+ * This function returns the type of the data format specified
+ * in user provided NVMe command in NVMe encapsulated request.
+ *
+ * Return: Data format of the NVMe command (PRP/SGL etc)
+ */
+static unsigned int mpi3mr_get_nvme_data_fmt(
+	struct mpi3_nvme_encapsulated_request *nvme_encap_request)
+{
+	u8 format = 0;
+
+	format = ((nvme_encap_request->command[0] & 0xc000) >> 14);
+	return format;
+
+}
+
+/**
+ * mpi3mr_build_nvme_sgl - SGL constructor for NVME
+ *				   encapsulated request
+ * @mrioc: Adapter instance reference
+ * @nvme_encap_request: NVMe encapsulated MPI request
+ * @drv_bufs: DMA address of the buffers to be placed in sgl
+ * @bufcnt: Number of DMA buffers
+ *
+ * This function places the DMA address of the given buffers in
+ * proper format as SGEs in the given NVMe encapsulated request.
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int mpi3mr_build_nvme_sgl(struct mpi3mr_ioc *mrioc,
+	struct mpi3_nvme_encapsulated_request *nvme_encap_request,
+	struct mpi3mr_buf_map *drv_bufs, u8 bufcnt)
+{
+	struct mpi3mr_nvme_pt_sge *nvme_sgl;
+	u64 sgl_ptr;
+	u8 count;
+	size_t length = 0;
+	struct mpi3mr_buf_map *drv_buf_iter = drv_bufs;
+	u64 sgemod_mask = ((u64)((mrioc->facts.sge_mod_mask) <<
+			    mrioc->facts.sge_mod_shift) << 32);
+	u64 sgemod_val = ((u64)(mrioc->facts.sge_mod_value) <<
+			  mrioc->facts.sge_mod_shift) << 32;
+
+	/*
+	 * Not all commands require a data transfer. If no data, just return
+	 * without constructing any sgl.
+	 */
+	for (count = 0; count < bufcnt; count++, drv_buf_iter++) {
+		if (drv_buf_iter->data_dir == DMA_NONE)
+			continue;
+		sgl_ptr = (u64)drv_buf_iter->kern_buf_dma;
+		length = drv_buf_iter->kern_buf_len;
+		break;
+	}
+	if (!length)
+		return 0;
+
+	if (sgl_ptr & sgemod_mask) {
+		dprint_bsg_err(mrioc,
+		    "%s: SGL address collides with SGE modifier\n",
+		    __func__);
+		return -1;
+	}
+
+	sgl_ptr &= ~sgemod_mask;
+	sgl_ptr |= sgemod_val;
+	nvme_sgl = (struct mpi3mr_nvme_pt_sge *)
+	    ((u8 *)(nvme_encap_request->command) + MPI3MR_NVME_CMD_SGL_OFFSET);
+	memset(nvme_sgl, 0, sizeof(struct mpi3mr_nvme_pt_sge));
+	nvme_sgl->base_addr = sgl_ptr;
+	nvme_sgl->length = length;
+	return 0;
+}
+
+/**
+ * mpi3mr_build_nvme_prp - PRP constructor for NVME
+ *			       encapsulated request
+ * @mrioc: Adapter instance reference
+ * @nvme_encap_request: NVMe encapsulated MPI request
+ * @drv_bufs: DMA address of the buffers to be placed in SGL
+ * @bufcnt: Number of DMA buffers
+ *
+ * This function places the DMA address of the given buffers in
+ * proper format as PRP entries in the given NVMe encapsulated
+ * request.
+ *
+ * Return: 0 on success, -1 on failure
+ */
+static int mpi3mr_build_nvme_prp(struct mpi3mr_ioc *mrioc,
+	struct mpi3_nvme_encapsulated_request *nvme_encap_request,
+	struct mpi3mr_buf_map *drv_bufs, u8 bufcnt)
+{
+	int prp_size = MPI3MR_NVME_PRP_SIZE;
+	__le64 *prp_entry, *prp1_entry, *prp2_entry;
+	__le64 *prp_page;
+	dma_addr_t prp_entry_dma, prp_page_dma, dma_addr;
+	u32 offset, entry_len, dev_pgsz;
+	u32 page_mask_result, page_mask;
+	size_t length = 0;
+	u8 count;
+	struct mpi3mr_buf_map *drv_buf_iter = drv_bufs;
+	u64 sgemod_mask = ((u64)((mrioc->facts.sge_mod_mask) <<
+			    mrioc->facts.sge_mod_shift) << 32);
+	u64 sgemod_val = ((u64)(mrioc->facts.sge_mod_value) <<
+			  mrioc->facts.sge_mod_shift) << 32;
+	u16 dev_handle = nvme_encap_request->dev_handle;
+	struct mpi3mr_tgt_dev *tgtdev;
+
+	tgtdev = mpi3mr_get_tgtdev_by_handle(mrioc, dev_handle);
+	if (!tgtdev) {
+		dprint_bsg_err(mrioc, "%s: invalid device handle 0x%04x\n",
+			__func__, dev_handle);
+		return -1;
+	}
+
+	if (tgtdev->dev_spec.pcie_inf.pgsz == 0) {
+		dprint_bsg_err(mrioc,
+		    "%s: NVMe device page size is zero for handle 0x%04x\n",
+		    __func__, dev_handle);
+		mpi3mr_tgtdev_put(tgtdev);
+		return -1;
+	}
+
+	dev_pgsz = 1 << (tgtdev->dev_spec.pcie_inf.pgsz);
+	mpi3mr_tgtdev_put(tgtdev);
+
+	/*
+	 * Not all commands require a data transfer. If no data, just return
+	 * without constructing any PRP.
+	 */
+	for (count = 0; count < bufcnt; count++, drv_buf_iter++) {
+		if (drv_buf_iter->data_dir == DMA_NONE)
+			continue;
+		dma_addr = drv_buf_iter->kern_buf_dma;
+		length = drv_buf_iter->kern_buf_len;
+		break;
+	}
+
+	if (!length)
+		return 0;
+
+	mrioc->prp_sz = 0;
+	mrioc->prp_list_virt = dma_alloc_coherent(&mrioc->pdev->dev,
+	    dev_pgsz, &mrioc->prp_list_dma, GFP_KERNEL);
+
+	if (!mrioc->prp_list_virt)
+		return -1;
+	mrioc->prp_sz = dev_pgsz;
+
+	/*
+	 * Set pointers to PRP1 and PRP2, which are in the NVMe command.
+	 * PRP1 is located at a 24 byte offset from the start of the NVMe
+	 * command.  Then set the current PRP entry pointer to PRP1.
+	 */
+	prp1_entry = (__le64 *)((u8 *)(nvme_encap_request->command) +
+	    MPI3MR_NVME_CMD_PRP1_OFFSET);
+	prp2_entry = (__le64 *)((u8 *)(nvme_encap_request->command) +
+	    MPI3MR_NVME_CMD_PRP2_OFFSET);
+	prp_entry = prp1_entry;
+	/*
+	 * For the PRP entries, use the specially allocated buffer of
+	 * contiguous memory.
+	 */
+	prp_page = (__le64 *)mrioc->prp_list_virt;
+	prp_page_dma = mrioc->prp_list_dma;
+
+	/*
+	 * Check if we are within 1 entry of a page boundary we don't
+	 * want our first entry to be a PRP List entry.
+	 */
+	page_mask = dev_pgsz - 1;
+	page_mask_result = (uintptr_t)((u8 *)prp_page + prp_size) & page_mask;
+	if (!page_mask_result) {
+		dprint_bsg_err(mrioc, "%s: PRP page is not page aligned\n",
+		    __func__);
+		goto err_out;
+	}
+
+	/*
+	 * Set PRP physical pointer, which initially points to the current PRP
+	 * DMA memory page.
+	 */
+	prp_entry_dma = prp_page_dma;
+
+
+	/* Loop while the length is not zero. */
+	while (length) {
+		page_mask_result = (prp_entry_dma + prp_size) & page_mask;
+		if (!page_mask_result && (length >  dev_pgsz)) {
+			dprint_bsg_err(mrioc,
+			    "%s: single PRP page is not sufficient\n",
+			    __func__);
+			goto err_out;
+		}
+
+		/* Need to handle if entry will be part of a page. */
+		offset = dma_addr & page_mask;
+		entry_len = dev_pgsz - offset;
+
+		if (prp_entry == prp1_entry) {
+			/*
+			 * Must fill in the first PRP pointer (PRP1) before
+			 * moving on.
+			 */
+			*prp1_entry = cpu_to_le64(dma_addr);
+			if (*prp1_entry & sgemod_mask) {
+				dprint_bsg_err(mrioc,
+				    "%s: PRP1 address collides with SGE modifier\n",
+				    __func__);
+				goto err_out;
+			}
+			*prp1_entry &= ~sgemod_mask;
+			*prp1_entry |= sgemod_val;
+
+			/*
+			 * Now point to the second PRP entry within the
+			 * command (PRP2).
+			 */
+			prp_entry = prp2_entry;
+		} else if (prp_entry == prp2_entry) {
+			/*
+			 * Should the PRP2 entry be a PRP List pointer or just
+			 * a regular PRP pointer?  If there is more than one
+			 * more page of data, must use a PRP List pointer.
+			 */
+			if (length > dev_pgsz) {
+				/*
+				 * PRP2 will contain a PRP List pointer because
+				 * more PRP's are needed with this command. The
+				 * list will start at the beginning of the
+				 * contiguous buffer.
+				 */
+				*prp2_entry = cpu_to_le64(prp_entry_dma);
+				if (*prp2_entry & sgemod_mask) {
+					dprint_bsg_err(mrioc,
+					    "%s: PRP list address collides with SGE modifier\n",
+					    __func__);
+					goto err_out;
+				}
+				*prp2_entry &= ~sgemod_mask;
+				*prp2_entry |= sgemod_val;
+
+				/*
+				 * The next PRP Entry will be the start of the
+				 * first PRP List.
+				 */
+				prp_entry = prp_page;
+				continue;
+			} else {
+				/*
+				 * After this, the PRP Entries are complete.
+				 * This command uses 2 PRP's and no PRP list.
+				 */
+				*prp2_entry = cpu_to_le64(dma_addr);
+				if (*prp2_entry & sgemod_mask) {
+					dprint_bsg_err(mrioc,
+					    "%s: PRP2 collides with SGE modifier\n",
+					    __func__);
+					goto err_out;
+				}
+				*prp2_entry &= ~sgemod_mask;
+				*prp2_entry |= sgemod_val;
+			}
+		} else {
+			/*
+			 * Put entry in list and bump the addresses.
+			 *
+			 * After PRP1 and PRP2 are filled in, this will fill in
+			 * all remaining PRP entries in a PRP List, one per
+			 * each time through the loop.
+			 */
+			*prp_entry = cpu_to_le64(dma_addr);
+			if (*prp1_entry & sgemod_mask) {
+				dprint_bsg_err(mrioc,
+				    "%s: PRP address collides with SGE modifier\n",
+				    __func__);
+				goto err_out;
+			}
+			*prp_entry &= ~sgemod_mask;
+			*prp_entry |= sgemod_val;
+			prp_entry++;
+			prp_entry_dma++;
+		}
+
+		/*
+		 * Bump the phys address of the command's data buffer by the
+		 * entry_len.
+		 */
+		dma_addr += entry_len;
+
+		/* decrement length accounting for last partial page. */
+		if (entry_len > length)
+			length = 0;
+		else
+			length -= entry_len;
+	}
+	return 0;
+err_out:
+	if (mrioc->prp_list_virt) {
+		dma_free_coherent(&mrioc->pdev->dev, mrioc->prp_sz,
+		    mrioc->prp_list_virt, mrioc->prp_list_dma);
+		mrioc->prp_list_virt = NULL;
+	}
+	return -1;
+}
 /**
  * mpi3mr_bsg_process_mpt_cmds - MPI Pass through BSG handler
  * @job: BSG job reference
@@ -652,7 +960,7 @@  static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply
 	struct mpi3mr_buf_map *drv_bufs = NULL, *drv_buf_iter = NULL;
 	u8 count, bufcnt = 0, is_rmcb = 0, is_rmrb = 0, din_cnt = 0, dout_cnt = 0;
 	u8 invalid_be = 0, erb_offset = 0xFF, mpirep_offset = 0xFF, sg_entries = 0;
-	u8 block_io = 0, resp_code = 0;
+	u8 block_io = 0, resp_code = 0, nvme_fmt = 0;
 	struct mpi3_request_header *mpi_header = NULL;
 	struct mpi3_status_reply_descriptor *status_desc;
 	struct mpi3_scsi_task_mgmt_request *tm_req;
@@ -892,7 +1200,34 @@  static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply
 		goto out;
 	}
 
-	if (mpi_header->function != MPI3_BSG_FUNCTION_NVME_ENCAPSULATED) {
+	if (mpi_header->function == MPI3_BSG_FUNCTION_NVME_ENCAPSULATED) {
+		nvme_fmt = mpi3mr_get_nvme_data_fmt(
+			(struct mpi3_nvme_encapsulated_request *)mpi_req);
+		if (nvme_fmt == MPI3MR_NVME_DATA_FORMAT_PRP) {
+			if (mpi3mr_build_nvme_prp(mrioc,
+			    (struct mpi3_nvme_encapsulated_request *)mpi_req,
+			    drv_bufs, bufcnt)) {
+				rval = -ENOMEM;
+				mutex_unlock(&mrioc->bsg_cmds.mutex);
+				goto out;
+			}
+		} else if (nvme_fmt == MPI3MR_NVME_DATA_FORMAT_SGL1 ||
+			nvme_fmt == MPI3MR_NVME_DATA_FORMAT_SGL2) {
+			if (mpi3mr_build_nvme_sgl(mrioc,
+			    (struct mpi3_nvme_encapsulated_request *)mpi_req,
+			    drv_bufs, bufcnt)) {
+				rval = -EINVAL;
+				mutex_unlock(&mrioc->bsg_cmds.mutex);
+				goto out;
+			}
+		} else {
+			dprint_bsg_err(mrioc,
+			    "%s:invalid NVMe command format\n", __func__);
+			rval = -EINVAL;
+			mutex_unlock(&mrioc->bsg_cmds.mutex);
+			goto out;
+		}
+	} else {
 		mpi3mr_bsg_build_sgl(mpi_req, (mpi_msg_size),
 		    drv_bufs, bufcnt, is_rmcb, is_rmrb,
 		    (dout_cnt + din_cnt));
@@ -970,7 +1305,8 @@  static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply
 			}
 		}
 
-		if (mpi_header->function == MPI3_BSG_FUNCTION_SCSI_IO)
+		if ((mpi_header->function == MPI3_BSG_FUNCTION_NVME_ENCAPSULATED) ||
+		    (mpi_header->function == MPI3_BSG_FUNCTION_SCSI_IO))
 			mpi3mr_issue_tm(mrioc,
 			    MPI3_SCSITASKMGMT_TASKTYPE_TARGET_RESET,
 			    mpi_header->function_dependent, 0,
@@ -984,6 +1320,12 @@  static long mpi3mr_bsg_process_mpt_cmds(struct bsg_job *job, unsigned int *reply
 	}
 	dprint_bsg_info(mrioc, "%s: bsg request is completed\n", __func__);
 
+	if (mrioc->prp_list_virt) {
+		dma_free_coherent(&mrioc->pdev->dev, mrioc->prp_sz,
+		    mrioc->prp_list_virt, mrioc->prp_list_dma);
+		mrioc->prp_list_virt = NULL;
+	}
+
 	if ((mrioc->bsg_cmds.ioc_status & MPI3_IOCSTATUS_STATUS_MASK)
 	     != MPI3_IOCSTATUS_SUCCESS) {
 		dprint_bsg_info(mrioc,
diff --git a/include/uapi/scsi/scsi_bsg_mpi3mr.h b/include/uapi/scsi/scsi_bsg_mpi3mr.h
index 870e6d87dd03..67f14c89b255 100644
--- a/include/uapi/scsi/scsi_bsg_mpi3mr.h
+++ b/include/uapi/scsi/scsi_bsg_mpi3mr.h
@@ -488,6 +488,14 @@  struct mpi3_nvme_encapsulated_error_reply {
 	__le32                     nvme_completion_entry[4];
 };
 
+#define	MPI3MR_NVME_PRP_SIZE		8 /* PRP size */
+#define	MPI3MR_NVME_CMD_PRP1_OFFSET	24 /* PRP1 offset in NVMe cmd */
+#define	MPI3MR_NVME_CMD_PRP2_OFFSET	32 /* PRP2 offset in NVMe cmd */
+#define	MPI3MR_NVME_CMD_SGL_OFFSET	24 /* SGL offset in NVMe cmd */
+#define MPI3MR_NVME_DATA_FORMAT_PRP	0
+#define MPI3MR_NVME_DATA_FORMAT_SGL1	1
+#define MPI3MR_NVME_DATA_FORMAT_SGL2	2
+
 /* MPI3: task management related definitions */
 struct mpi3_scsi_task_mgmt_request {
 	__le16                     host_tag;

[v4,7/8] mpi3mr: add support for nvme pass-through

Commit Message

Comments

Patch