mbox series

[0/6,V3] iscsi fixes

Message ID 1608518226-30376-1-git-send-email-michael.christie@oracle.com
Headers show
Series iscsi fixes | expand

Message

Mike Christie Dec. 21, 2020, 2:37 a.m. UTC
The following patches were made over Linus's tree. Martin's staging
branches were missing fe0a8a95e7134d0b44cd407bc0085b9ba8d8fe31.

The patches are mostly fixes. There is one cleanup to the locking
and non standard task cleanup that is used by a later patch that
fixes the refcounting during timeouts/TMFs.

V3:
- Add some patches for issues found while testing.
	- session queue depth was stuck at 128
	- cmd window could not be detected when session was relogged in.
- Patch "libiscsi: drop taskqueuelock" had a bug where we did not
disable bhs and during xmit thread suspension leaked the current task.
V2:
- Take back_lock when looping over running cmds in iscsi_eh_cmd_timed_out
in case those complete while we are accessing them.

Comments

Lee Duncan Jan. 5, 2021, 10:53 p.m. UTC | #1
On 12/20/20 6:37 PM, Mike Christie wrote:
> This patch just breaks out the code that calculates the number

> of scsi cmds that will be used for a scsi session. It also adds

> a check that we don't go over the host's can_queue value.


I'm curious. It's a "good thing" to check the command count in a better
way now, but was there any known instance of the count miscalculation in
the current code causing issues?

> 

> Signed-off-by: Mike Christie <michael.christie@oracle.com>

> ---

>  drivers/scsi/libiscsi.c | 81 ++++++++++++++++++++++++++++++-------------------

>  include/scsi/libiscsi.h |  2 ++

>  2 files changed, 51 insertions(+), 32 deletions(-)

> 

> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

> index 796465e..f1ade91 100644

> --- a/drivers/scsi/libiscsi.c

> +++ b/drivers/scsi/libiscsi.c

> @@ -2648,6 +2648,51 @@ void iscsi_pool_free(struct iscsi_pool *q)

>  }

>  EXPORT_SYMBOL_GPL(iscsi_pool_free);

>  

> +int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,

> +				 uint16_t requested_cmds_max)

> +{

> +	int scsi_cmds, total_cmds = requested_cmds_max;

> +

> +	if (!total_cmds)

> +		total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;

> +	/*

> +	 * The iscsi layer needs some tasks for nop handling and tmfs,

> +	 * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX

> +	 * + 1 command for scsi IO.

> +	 */

> +	if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {

> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of two that is at least %d.\n",

> +		       total_cmds, ISCSI_TOTAL_CMDS_MIN);

> +		return -EINVAL;

> +	}

> +

> +	if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {

> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of 2 less than or equal to %d.\n",

> +		       requested_cmds_max, ISCSI_TOTAL_CMDS_MAX);

> +		total_cmds = ISCSI_TOTAL_CMDS_MAX;

> +	}

> +

> +	if (!is_power_of_2(total_cmds)) {

> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of 2.\n",

> +		       total_cmds);

> +		total_cmds = rounddown_pow_of_two(total_cmds);

> +		if (total_cmds < ISCSI_TOTAL_CMDS_MIN)

> +			return -EINVAL;

> +		printk(KERN_INFO "iscsi: Rounding can_queue to %d.\n",

> +		       total_cmds);

> +	}

> +

> +	scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;

> +	if (shost->can_queue && scsi_cmds > shost->can_queue) {

> +		scsi_cmds = shost->can_queue - ISCSI_MGMT_CMDS_MAX;

> +		printk(KERN_INFO "iscsi: requested cmds_max %u higher than driver limit. Using driver max %u\n",

> +		       requested_cmds_max, shost->can_queue);

> +	}


If the device can queue, what if "can_queue" is equal to or less than
ISCSI_MGMT_CMDS_MAX?

> +

> +	return scsi_cmds;

> +}

> +EXPORT_SYMBOL_GPL(iscsi_host_get_max_scsi_cmds);

> +

>  /**

>   * iscsi_host_add - add host to system

>   * @shost: scsi host

> @@ -2800,7 +2845,7 @@ struct iscsi_cls_session *

>  	struct iscsi_host *ihost = shost_priv(shost);

>  	struct iscsi_session *session;

>  	struct iscsi_cls_session *cls_session;

> -	int cmd_i, scsi_cmds, total_cmds = cmds_max;

> +	int cmd_i, scsi_cmds;

>  	unsigned long flags;

>  

>  	spin_lock_irqsave(&ihost->lock, flags);

> @@ -2811,37 +2856,9 @@ struct iscsi_cls_session *

>  	ihost->num_sessions++;

>  	spin_unlock_irqrestore(&ihost->lock, flags);

>  

> -	if (!total_cmds)

> -		total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;

> -	/*

> -	 * The iscsi layer needs some tasks for nop handling and tmfs,

> -	 * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX

> -	 * + 1 command for scsi IO.

> -	 */

> -	if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {

> -		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "

> -		       "must be a power of two that is at least %d.\n",

> -		       total_cmds, ISCSI_TOTAL_CMDS_MIN);

> +	scsi_cmds = iscsi_host_get_max_scsi_cmds(shost, cmds_max);

> +	if (scsi_cmds < 0)

>  		goto dec_session_count;


Should this be "scsi_cmds <= 0" ? Having zero commands doesn't seem good.

> -	}

> -

> -	if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {

> -		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "

> -		       "must be a power of 2 less than or equal to %d.\n",

> -		       cmds_max, ISCSI_TOTAL_CMDS_MAX);

> -		total_cmds = ISCSI_TOTAL_CMDS_MAX;

> -	}

> -

> -	if (!is_power_of_2(total_cmds)) {

> -		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "

> -		       "must be a power of 2.\n", total_cmds);

> -		total_cmds = rounddown_pow_of_two(total_cmds);

> -		if (total_cmds < ISCSI_TOTAL_CMDS_MIN)

> -			goto dec_session_count;

> -		printk(KERN_INFO "iscsi: Rounding can_queue to %d.\n",

> -		       total_cmds);

> -	}

> -	scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;

>  

>  	cls_session = iscsi_alloc_session(shost, iscsit,

>  					  sizeof(struct iscsi_session) +

> @@ -2857,7 +2874,7 @@ struct iscsi_cls_session *

>  	session->lu_reset_timeout = 15;

>  	session->abort_timeout = 10;

>  	session->scsi_cmds_max = scsi_cmds;

> -	session->cmds_max = total_cmds;

> +	session->cmds_max = scsi_cmds + ISCSI_MGMT_CMDS_MAX;

>  	session->queued_cmdsn = session->cmdsn = initial_cmdsn;

>  	session->exp_cmdsn = initial_cmdsn + 1;

>  	session->max_cmdsn = initial_cmdsn + 1;

> diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h

> index 44a9554..02f966e 100644

> --- a/include/scsi/libiscsi.h

> +++ b/include/scsi/libiscsi.h

> @@ -395,6 +395,8 @@ extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht,

>  extern void iscsi_host_remove(struct Scsi_Host *shost);

>  extern void iscsi_host_free(struct Scsi_Host *shost);

>  extern int iscsi_target_alloc(struct scsi_target *starget);

> +extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,

> +					uint16_t requested_cmds_max);

>  

>  /*

>   * session management

>
Lee Duncan Jan. 5, 2021, 10:55 p.m. UTC | #2
On 12/20/20 6:37 PM, Mike Christie wrote:
> We are setting the shost's can_queue after we add the host which is

> too late, because scsi-ml will have allocated the tag set based on

> the can_queue value at that time. This patch has us use the

> iscsi_host_get_max_scsi_cmds helper to figure out the number of

> scsi cmds, so we can set it properly. We should now not be limited

> to 128 cmds per session.

> 

> It also fixes up the template can_queue so it reflects the max scsi

> cmds we can support like how other drivers work.

> 

> Signed-off-by: Mike Christie <michael.christie@oracle.com>

> ---

>  drivers/scsi/iscsi_tcp.c | 9 +++++++--

>  1 file changed, 7 insertions(+), 2 deletions(-)

> 

> diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c

> index df47557..7a5aec7 100644

> --- a/drivers/scsi/iscsi_tcp.c

> +++ b/drivers/scsi/iscsi_tcp.c

> @@ -847,6 +847,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,

>  	struct iscsi_session *session;

>  	struct iscsi_sw_tcp_host *tcp_sw_host;

>  	struct Scsi_Host *shost;

> +	int rc;

>  

>  	if (ep) {

>  		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);

> @@ -864,6 +865,11 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,

>  	shost->max_channel = 0;

>  	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;

>  

> +	rc = iscsi_host_get_max_scsi_cmds(shost, cmds_max);

> +	if (rc < 0)

> +		goto free_host;


Same question as in Patch 4: Is having "0" max scsi commands ok?

> +	shost->can_queue = rc;

> +

>  	if (iscsi_host_add(shost, NULL))

>  		goto free_host;

>  

> @@ -878,7 +884,6 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,

>  	tcp_sw_host = iscsi_host_priv(shost);

>  	tcp_sw_host->session = session;

>  

> -	shost->can_queue = session->scsi_cmds_max;

>  	if (iscsi_tcp_r2tpool_alloc(session))

>  		goto remove_session;

>  	return cls_session;

> @@ -981,7 +986,7 @@ static int iscsi_sw_tcp_slave_configure(struct scsi_device *sdev)

>  	.name			= "iSCSI Initiator over TCP/IP",

>  	.queuecommand           = iscsi_queuecommand,

>  	.change_queue_depth	= scsi_change_queue_depth,

> -	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,

> +	.can_queue		= ISCSI_TOTAL_CMDS_MAX - ISCSI_MGMT_CMDS_MAX,

>  	.sg_tablesize		= 4096,

>  	.max_sectors		= 0xFFFF,

>  	.cmd_per_lun		= ISCSI_DEF_CMD_PER_LUN,

>
Lee Duncan Jan. 5, 2021, 10:57 p.m. UTC | #3
On 12/20/20 6:37 PM, Mike Christie wrote:
> If we lose the session then relogin, but the new cmdsn window has

> shrunk (due to something like an admin changing a setting) we will

> have the old exp/max_cmdsn values and will never be able to update

> them. For example, max_cmdsn would be 64, but if on the target the

> user set the window to be smaller then the target could try to return

> the max_cmdsn as 32. We will see that new max_cmdsn in the rsp but

> because it's lower than the old max_cmdsn when the window was larger

> we will not update it.

> 

> So this patch has us reset the windown values during session

> cleanup so they can be updated after a new login.

> 

> Signed-off-by: Mike Christie <michael.christie@oracle.com>

> ---

>  drivers/scsi/libiscsi.c | 7 +++++++

>  1 file changed, 7 insertions(+)

> 

> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

> index f1ade91..ff6ba78 100644

> --- a/drivers/scsi/libiscsi.c

> +++ b/drivers/scsi/libiscsi.c

> @@ -3268,6 +3268,13 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,

>  	spin_unlock_bh(&session->frwd_lock);

>  

>  	/*

> +	 * The target could have reduced it's window size between logins, so

> +	 * we have to reset max/exp cmdsn so we can see the new values.

> +	 */

> +	spin_lock_bh(&session->back_lock);

> +	session->max_cmdsn = session->exp_cmdsn = session->cmdsn + 1;

> +	spin_unlock_bh(&session->back_lock);

> +	/*

>  	 * Unblock xmitworker(), Login Phase will pass through.

>  	 */

>  	clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);

> 


Reviewed-by: Lee Duncan <lduncan@suse.com>
Mike Christie Jan. 5, 2021, 11:18 p.m. UTC | #4
On 1/5/21 4:53 PM, Lee Duncan wrote:
> On 12/20/20 6:37 PM, Mike Christie wrote:

>> This patch just breaks out the code that calculates the number

>> of scsi cmds that will be used for a scsi session. It also adds

>> a check that we don't go over the host's can_queue value.

> 

> I'm curious. It's a "good thing" to check the command count in a better

> way now, but was there any known instance of the count miscalculation in

> the current code causing issues?


No one has hit any issues. It's so userspace knows it's not going to
get the requested value.

> 

>>

>> Signed-off-by: Mike Christie <michael.christie@oracle.com>

>> ---

>>  drivers/scsi/libiscsi.c | 81 ++++++++++++++++++++++++++++++-------------------

>>  include/scsi/libiscsi.h |  2 ++

>>  2 files changed, 51 insertions(+), 32 deletions(-)

>>

>> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

>> index 796465e..f1ade91 100644

>> --- a/drivers/scsi/libiscsi.c

>> +++ b/drivers/scsi/libiscsi.c

>> @@ -2648,6 +2648,51 @@ void iscsi_pool_free(struct iscsi_pool *q)

>>  }

>>  EXPORT_SYMBOL_GPL(iscsi_pool_free);

>>  

>> +int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,

>> +				 uint16_t requested_cmds_max)

>> +{

>> +	int scsi_cmds, total_cmds = requested_cmds_max;

>> +

>> +	if (!total_cmds)

>> +		total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;

>> +	/*

>> +	 * The iscsi layer needs some tasks for nop handling and tmfs,

>> +	 * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX

>> +	 * + 1 command for scsi IO.

>> +	 */

>> +	if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {

>> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of two that is at least %d.\n",

>> +		       total_cmds, ISCSI_TOTAL_CMDS_MIN);

>> +		return -EINVAL;

>> +	}

>> +

>> +	if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {

>> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of 2 less than or equal to %d.\n",

>> +		       requested_cmds_max, ISCSI_TOTAL_CMDS_MAX);

>> +		total_cmds = ISCSI_TOTAL_CMDS_MAX;

>> +	}

>> +

>> +	if (!is_power_of_2(total_cmds)) {

>> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of 2.\n",

>> +		       total_cmds);

>> +		total_cmds = rounddown_pow_of_two(total_cmds);

>> +		if (total_cmds < ISCSI_TOTAL_CMDS_MIN)

>> +			return -EINVAL;

>> +		printk(KERN_INFO "iscsi: Rounding can_queue to %d.\n",

>> +		       total_cmds);

>> +	}

>> +

>> +	scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;

>> +	if (shost->can_queue && scsi_cmds > shost->can_queue) {

>> +		scsi_cmds = shost->can_queue - ISCSI_MGMT_CMDS_MAX;

>> +		printk(KERN_INFO "iscsi: requested cmds_max %u higher than driver limit. Using driver max %u\n",

>> +		       requested_cmds_max, shost->can_queue);

>> +	}

> 

> If the device can queue, what if "can_queue" is equal to or less than

> ISCSI_MGMT_CMDS_MAX?

>


It wouldn't be possible, because the drivers set their can_queue a lot higher
than ISCSI_MGMT_CMDS_MAX, but for this and the other comment I'll fix up the
check/code.
Mike Christie Jan. 5, 2021, 11:22 p.m. UTC | #5
On 1/5/21 4:55 PM, Lee Duncan wrote:
> On 12/20/20 6:37 PM, Mike Christie wrote:

>> We are setting the shost's can_queue after we add the host which is

>> too late, because scsi-ml will have allocated the tag set based on

>> the can_queue value at that time. This patch has us use the

>> iscsi_host_get_max_scsi_cmds helper to figure out the number of

>> scsi cmds, so we can set it properly. We should now not be limited

>> to 128 cmds per session.

>>

>> It also fixes up the template can_queue so it reflects the max scsi

>> cmds we can support like how other drivers work.

>>

>> Signed-off-by: Mike Christie <michael.christie@oracle.com>

>> ---

>>  drivers/scsi/iscsi_tcp.c | 9 +++++++--

>>  1 file changed, 7 insertions(+), 2 deletions(-)

>>

>> diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c

>> index df47557..7a5aec7 100644

>> --- a/drivers/scsi/iscsi_tcp.c

>> +++ b/drivers/scsi/iscsi_tcp.c

>> @@ -847,6 +847,7 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,

>>  	struct iscsi_session *session;

>>  	struct iscsi_sw_tcp_host *tcp_sw_host;

>>  	struct Scsi_Host *shost;

>> +	int rc;

>>  

>>  	if (ep) {

>>  		printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);

>> @@ -864,6 +865,11 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost,

>>  	shost->max_channel = 0;

>>  	shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;

>>  

>> +	rc = iscsi_host_get_max_scsi_cmds(shost, cmds_max);

>> +	if (rc < 0)

>> +		goto free_host;

> 

> Same question as in Patch 4: Is having "0" max scsi commands ok?

> 


This could hit zero. I think before we would end up where no cmds
would be executed. They would just be stuck in the queues because
the target->can_queue limit would always be hit. I'll fix that up too.
Mike Christie Jan. 5, 2021, 11:24 p.m. UTC | #6
On 1/5/21 5:18 PM, Mike Christie wrote:
> On 1/5/21 4:53 PM, Lee Duncan wrote:

>> On 12/20/20 6:37 PM, Mike Christie wrote:

>>> This patch just breaks out the code that calculates the number

>>> of scsi cmds that will be used for a scsi session. It also adds

>>> a check that we don't go over the host's can_queue value.

>>

>> I'm curious. It's a "good thing" to check the command count in a better

>> way now, but was there any known instance of the count miscalculation in

>> the current code causing issues?

> 

> No one has hit any issues. It's so userspace knows it's not going to

> get the requested value.

> 

>>

>>>

>>> Signed-off-by: Mike Christie <michael.christie@oracle.com>

>>> ---

>>>  drivers/scsi/libiscsi.c | 81 ++++++++++++++++++++++++++++++-------------------

>>>  include/scsi/libiscsi.h |  2 ++

>>>  2 files changed, 51 insertions(+), 32 deletions(-)

>>>

>>> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

>>> index 796465e..f1ade91 100644

>>> --- a/drivers/scsi/libiscsi.c

>>> +++ b/drivers/scsi/libiscsi.c

>>> @@ -2648,6 +2648,51 @@ void iscsi_pool_free(struct iscsi_pool *q)

>>>  }

>>>  EXPORT_SYMBOL_GPL(iscsi_pool_free);

>>>  

>>> +int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,

>>> +				 uint16_t requested_cmds_max)

>>> +{

>>> +	int scsi_cmds, total_cmds = requested_cmds_max;

>>> +

>>> +	if (!total_cmds)

>>> +		total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;

>>> +	/*

>>> +	 * The iscsi layer needs some tasks for nop handling and tmfs,

>>> +	 * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX

>>> +	 * + 1 command for scsi IO.

>>> +	 */

>>> +	if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {

>>> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of two that is at least %d.\n",

>>> +		       total_cmds, ISCSI_TOTAL_CMDS_MIN);

>>> +		return -EINVAL;

>>> +	}

>>> +

>>> +	if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {

>>> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of 2 less than or equal to %d.\n",

>>> +		       requested_cmds_max, ISCSI_TOTAL_CMDS_MAX);

>>> +		total_cmds = ISCSI_TOTAL_CMDS_MAX;

>>> +	}

>>> +

>>> +	if (!is_power_of_2(total_cmds)) {

>>> +		printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue must be a power of 2.\n",

>>> +		       total_cmds);

>>> +		total_cmds = rounddown_pow_of_two(total_cmds);

>>> +		if (total_cmds < ISCSI_TOTAL_CMDS_MIN)

>>> +			return -EINVAL;

>>> +		printk(KERN_INFO "iscsi: Rounding can_queue to %d.\n",

>>> +		       total_cmds);

>>> +	}

>>> +

>>> +	scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;

>>> +	if (shost->can_queue && scsi_cmds > shost->can_queue) {

>>> +		scsi_cmds = shost->can_queue - ISCSI_MGMT_CMDS_MAX;

>>> +		printk(KERN_INFO "iscsi: requested cmds_max %u higher than driver limit. Using driver max %u\n",

>>> +		       requested_cmds_max, shost->can_queue);

>>> +	}

>>

>> If the device can queue, what if "can_queue" is equal to or less than

>> ISCSI_MGMT_CMDS_MAX?

>>

> 

> It wouldn't be possible,


Correction on that. With he current code we can hit zero for iscsi_tcp
and old tools with iser. I mentioned in the other email I'll fix that
up.