Message ID | 1591810159-240929-13-git-send-email-john.garry@huawei.com |
---|---|
State | Superseded |
Headers | show |
Series | blk-mq/scsi: Provide hostwide shared tags for SCSI HBAs | expand |
On 10/06/2020 18:29, John Garry wrote: > From: Hannes Reinecke <hare@suse.de> > > The smart array HBAs can steer interrupt completion, so this > patch switches the implementation to use multiqueue and enables > 'host_tagset' as the HBA has a shared host-wide tagset. > Hi Don, I am preparing the next iteration of this series, and we're getting close to dropping the RFC tags. The series has grown a bit, and I am not sure what to do with hpsa support. The latest versions of this series have not been tested for hpsa, AFAIK. Can you let me know if you can test and review this patch? Or someone else let me know it's tested (Hannes?) Thanks > Signed-off-by: Hannes Reinecke <hare@suse.de> > --- > drivers/scsi/hpsa.c | 44 +++++++------------------------------------- > drivers/scsi/hpsa.h | 1 - > 2 files changed, 7 insertions(+), 38 deletions(-) > > diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c > index 1e9302e99d05..f807f9bdae85 100644 > --- a/drivers/scsi/hpsa.c > +++ b/drivers/scsi/hpsa.c > @@ -980,6 +980,7 @@ static struct scsi_host_template hpsa_driver_template = { > .shost_attrs = hpsa_shost_attrs, > .max_sectors = 2048, > .no_write_same = 1, > + .host_tagset = 1, > }; > > static inline u32 next_command(struct ctlr_info *h, u8 q) > @@ -1144,12 +1145,14 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, > static void __enqueue_cmd_and_start_io(struct ctlr_info *h, > struct CommandList *c, int reply_queue) > { > + u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); > + > dial_down_lockup_detection_during_fw_flash(h, c); > atomic_inc(&h->commands_outstanding); > if (c->device) > atomic_inc(&c->device->commands_outstanding); > > - reply_queue = h->reply_map[raw_smp_processor_id()]; > + reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); > switch (c->cmd_type) { > case CMD_IOACCEL1: > set_ioaccel1_performant_mode(h, c, reply_queue); > @@ -5653,8 +5656,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) > /* Get the ptr to our adapter structure out of cmd->host. */ > h = sdev_to_hba(cmd->device); > > - BUG_ON(cmd->request->tag < 0); > - > dev = cmd->device->hostdata; > if (!dev) { > cmd->result = DID_NO_CONNECT << 16; > @@ -5830,7 +5831,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) > sh->hostdata[0] = (unsigned long) h; > sh->irq = pci_irq_vector(h->pdev, 0); > sh->unique_id = sh->irq; > - > + sh->nr_hw_queues = h->msix_vectors > 0 ? h->msix_vectors : 1; > h->scsi_host = sh; > return 0; > } > @@ -5856,7 +5857,8 @@ static int hpsa_scsi_add_host(struct ctlr_info *h) > */ > static int hpsa_get_cmd_index(struct scsi_cmnd *scmd) > { > - int idx = scmd->request->tag; > + u32 blk_tag = blk_mq_unique_tag(scmd->request); > + int idx = blk_mq_unique_tag_to_tag(blk_tag); > > if (idx < 0) > return idx; > @@ -7456,26 +7458,6 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) > h->msix_vectors = 0; > } > > -static void hpsa_setup_reply_map(struct ctlr_info *h) > -{ > - const struct cpumask *mask; > - unsigned int queue, cpu; > - > - for (queue = 0; queue < h->msix_vectors; queue++) { > - mask = pci_irq_get_affinity(h->pdev, queue); > - if (!mask) > - goto fallback; > - > - for_each_cpu(cpu, mask) > - h->reply_map[cpu] = queue; > - } > - return; > - > -fallback: > - for_each_possible_cpu(cpu) > - h->reply_map[cpu] = 0; > -} > - > /* If MSI/MSI-X is supported by the kernel we will try to enable it on > * controllers that are capable. If not, we use legacy INTx mode. > */ > @@ -7872,9 +7854,6 @@ static int hpsa_pci_init(struct ctlr_info *h) > if (err) > goto clean1; > > - /* setup mapping between CPU and reply queue */ > - hpsa_setup_reply_map(h); > - > err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); > if (err) > goto clean2; /* intmode+region, pci */ > @@ -8613,7 +8592,6 @@ static struct workqueue_struct *hpsa_create_controller_wq(struct ctlr_info *h, > > static void hpda_free_ctlr_info(struct ctlr_info *h) > { > - kfree(h->reply_map); > kfree(h); > } > > @@ -8622,14 +8600,6 @@ static struct ctlr_info *hpda_alloc_ctlr_info(void) > struct ctlr_info *h; > > h = kzalloc(sizeof(*h), GFP_KERNEL); > - if (!h) > - return NULL; > - > - h->reply_map = kcalloc(nr_cpu_ids, sizeof(*h->reply_map), GFP_KERNEL); > - if (!h->reply_map) { > - kfree(h); > - return NULL; > - } > return h; > } > > diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h > index f8c88fc7b80a..ea4a609e3eb7 100644 > --- a/drivers/scsi/hpsa.h > +++ b/drivers/scsi/hpsa.h > @@ -161,7 +161,6 @@ struct bmic_controller_parameters { > #pragma pack() > > struct ctlr_info { > - unsigned int *reply_map; > int ctlr; > char devname[8]; > char *product_name; >
On 7/14/20 9:37 AM, John Garry wrote: > On 10/06/2020 18:29, John Garry wrote: >> From: Hannes Reinecke <hare@suse.de> >> >> The smart array HBAs can steer interrupt completion, so this >> patch switches the implementation to use multiqueue and enables >> 'host_tagset' as the HBA has a shared host-wide tagset. >> > > Hi Don, > > I am preparing the next iteration of this series, and we're getting > close to dropping the RFC tags. The series has grown a bit, and I am not > sure what to do with hpsa support. > > The latest versions of this series have not been tested for hpsa, AFAIK. > Can you let me know if you can test and review this patch? Or someone > else let me know it's tested (Hannes?) > I'll give it a go. Which git repository should I base the tests on? Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
On 14/07/2020 08:41, Hannes Reinecke wrote: > On 7/14/20 9:37 AM, John Garry wrote: >> On 10/06/2020 18:29, John Garry wrote: >>> From: Hannes Reinecke <hare@suse.de> >>> >>> The smart array HBAs can steer interrupt completion, so this >>> patch switches the implementation to use multiqueue and enables >>> 'host_tagset' as the HBA has a shared host-wide tagset. >>> >> >> Hi Don, >> >> I am preparing the next iteration of this series, and we're getting >> close to dropping the RFC tags. The series has grown a bit, and I am not >> sure what to do with hpsa support. >> >> The latest versions of this series have not been tested for hpsa, AFAIK. >> Can you let me know if you can test and review this patch? Or someone >> else let me know it's tested (Hannes?) >> I'll give it a go. > > Which git repository should I base the tests on? v7 is here: https://github.com/hisilicon/kernel-dev/commits/private-topic-blk-mq-shared-tags-rfc-v7 So that should be good to test with for now. And I was going to ask this same question about smartpqi, so can you please let me know about this one? Thanks, John
On Tue, Jul 14, 2020 at 08:52:36AM +0100, John Garry wrote: > On 14/07/2020 08:41, Hannes Reinecke wrote: > > On 7/14/20 9:37 AM, John Garry wrote: > > > On 10/06/2020 18:29, John Garry wrote: > > > > From: Hannes Reinecke <hare@suse.de> > > > > > > > > The smart array HBAs can steer interrupt completion, so this > > > > patch switches the implementation to use multiqueue and enables > > > > 'host_tagset' as the HBA has a shared host-wide tagset. > > > > > > > > > > Hi Don, > > > > > > I am preparing the next iteration of this series, and we're getting > > > close to dropping the RFC tags. The series has grown a bit, and I am not > > > sure what to do with hpsa support. > > > > > > The latest versions of this series have not been tested for hpsa, AFAIK. > > > Can you let me know if you can test and review this patch? Or someone > > > else let me know it's tested (Hannes?) > > > I'll give it a go. > > > > Which git repository should I base the tests on? > > v7 is here: > > https://github.com/hisilicon/kernel-dev/commits/private-topic-blk-mq-shared-tags-rfc-v7 > > So that should be good to test with for now. > > And I was going to ask this same question about smartpqi, so can you please > let me know about this one? smartpqi is real MQ HBA, do you need any change wrt. shared tags? Thanks, Ming
On 14/07/2020 09:06, Ming Lei wrote: >> v7 is here: >> >> https://github.com/hisilicon/kernel-dev/commits/private-topic-blk-mq-shared-tags-rfc-v7 >> >> So that should be good to test with for now. >> >> And I was going to ask this same question about smartpqi, so can you please >> let me know about this one? Hi Ming, > smartpqi is real MQ HBA, do you need any change wrt. shared tags? Is it really? As I see, today it maintains a single tagset per HBA. So Hannes' change in this series seems ok. However, I do worry that mainline code may be wrong, as block layer may send can_queue * nr_hw_queues requests, when it seems the HBA can only handle can_queue requests. Thanks, john
On Tue, Jul 14, 2020 at 10:53:32AM +0100, John Garry wrote: > On 14/07/2020 09:06, Ming Lei wrote: > > > v7 is here: > > > > > > https://github.com/hisilicon/kernel-dev/commits/private-topic-blk-mq-shared-tags-rfc-v7 > > > > > > So that should be good to test with for now. > > > > > > And I was going to ask this same question about smartpqi, so can you please > > > let me know about this one? > > Hi Ming, > > > smartpqi is real MQ HBA, do you need any change wrt. shared tags? > > Is it really? Yes, it is. pqi_register_scsi(): shost->nr_hw_queues = ctrl_info->num_queue_groups; pqi_enable_msix_interrupts(): num_vectors_enabled = pci_alloc_irq_vectors(ctrl_info->pci_dev, PQI_MIN_MSIX_VECTORS, ctrl_info->num_queue_groups, PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); > > As I see, today it maintains a single tagset per HBA. So Hannes' change in No, each hw queue has one independent tagset for smartpqi. > this series seems ok. However, I do worry that mainline code may be wrong, > as block layer may send can_queue * nr_hw_queues requests, when it seems the > HBA can only handle can_queue requests. I have one machine in which all system are installed on smartpqi disks, and I almost work on the system everyday, so far so good with this real MQ style. Can you explain a bit why you worry the mainline code may be wrong? Thanks, Ming
On 7/14/20 11:53 AM, John Garry wrote: > On 14/07/2020 09:06, Ming Lei wrote: >>> v7 is here: >>> >>> https://github.com/hisilicon/kernel-dev/commits/private-topic-blk-mq-shared-tags-rfc-v7 >>> >>> >>> So that should be good to test with for now. >>> >>> And I was going to ask this same question about smartpqi, so can you >>> please >>> let me know about this one? > > Hi Ming, > >> smartpqi is real MQ HBA, do you need any change wrt. shared tags? > > Is it really? > > As I see, today it maintains a single tagset per HBA. So Hannes' change > in this series seems ok. However, I do worry that mainline code may be > wrong, as block layer may send can_queue * nr_hw_queues requests, when > it seems the HBA can only handle can_queue requests. > Correct. There is only one tagset per host, even if the host supports several queues (guess why it's called smart PQI :-). And mainline code isn't really wrong, it just allocates the next free tag from the host tagset; it's not using the block-layer tags at all. Precisely because the block layer currently cannot guarantee that tags are unique per host. And the point of this patchset is exactly that the block layer will only send up to 'can_queue' requests, irrespective on how many hardware queues are present. But anyway, I'll test it on smartpqi, too. Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
On 14/07/2020 11:19, Hannes Reinecke wrote: > Correct. There is only one tagset per host, even if the host supports > several queues (guess why it's called smart PQI:-). > And mainline code isn't really wrong, it just allocates the next free > tag from the host tagset; it's not using the block-layer tags at all. > Precisely because the block layer currently cannot guarantee that tags > are unique per host. ok, but it's not exemplary in what it does. And I suppose that's why it needs to spin indefinitely for a free tag in pqi_alloc_io_request(), instead of failing immediately when none are free. > > And the point of this patchset is exactly that the block layer will only > send up to 'can_queue' requests, irrespective on how many hardware > queues are present. > > But anyway, I'll test it on smartpqi, too. > Great, thanks.
On 7/14/20 12:14 PM, Ming Lei wrote: > On Tue, Jul 14, 2020 at 10:53:32AM +0100, John Garry wrote: >> On 14/07/2020 09:06, Ming Lei wrote: >>>> v7 is here: >>>> >>>> https://github.com/hisilicon/kernel-dev/commits/private-topic-blk-mq-shared-tags-rfc-v7 >>>> >>>> So that should be good to test with for now. >>>> >>>> And I was going to ask this same question about smartpqi, so can you please >>>> let me know about this one? >> >> Hi Ming, >> >>> smartpqi is real MQ HBA, do you need any change wrt. shared tags? >> >> Is it really? > > Yes, it is. > > pqi_register_scsi(): > shost->nr_hw_queues = ctrl_info->num_queue_groups; > > pqi_enable_msix_interrupts(): > num_vectors_enabled = pci_alloc_irq_vectors(ctrl_info->pci_dev, > PQI_MIN_MSIX_VECTORS, ctrl_info->num_queue_groups, > PCI_IRQ_MSIX | PCI_IRQ_AFFINITY); > >> >> As I see, today it maintains a single tagset per HBA. So Hannes' change in > > No, each hw queue has one independent tagset for smartpqi. > Has it really? The code has this: static struct pqi_io_request *pqi_alloc_io_request( struct pqi_ctrl_info *ctrl_info) { struct pqi_io_request *io_request; u16 i = ctrl_info->next_io_request_slot; /* benignly racy */ while (1) { io_request = &ctrl_info->io_request_pool[i]; if (atomic_inc_return(&io_request->refcount) == 1) break; atomic_dec(&io_request->refcount); i = (i + 1) % ctrl_info->max_io_slots; } which means that at least the driver assumes a host-wide tagset. Looking at the code the HW _should_ support per-queue tagsets (it's PQI, after all), but this doesn't seem to be carried over for the entire driver; possibly due to legacy concerns. Don should be able to shed some light here. Meanwhile we have to assume that the _driver_ uses a per-host tagset; we might be able to convert it to a per-queue tagset, but that looks like a major update of the driver itself. Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
On Tue, Jul 14, 2020 at 12:19:07PM +0200, Hannes Reinecke wrote: > On 7/14/20 11:53 AM, John Garry wrote: > > On 14/07/2020 09:06, Ming Lei wrote: > >>> v7 is here: > >>> > >>> https://github.com/hisilicon/kernel-dev/commits/private-topic-blk-mq-shared-tags-rfc-v7 > >>> > >>> > >>> So that should be good to test with for now. > >>> > >>> And I was going to ask this same question about smartpqi, so can you > >>> please > >>> let me know about this one? > > > > Hi Ming, > > > >> smartpqi is real MQ HBA, do you need any change wrt. shared tags? > > > > Is it really? > > > > As I see, today it maintains a single tagset per HBA. So Hannes' change > > in this series seems ok. However, I do worry that mainline code may be > > wrong, as block layer may send can_queue * nr_hw_queues requests, when > > it seems the HBA can only handle can_queue requests. > > > Correct. There is only one tagset per host, even if the host supports > several queues (guess why it's called smart PQI :-). > And mainline code isn't really wrong, it just allocates the next free > tag from the host tagset; it's not using the block-layer tags at all. > Precisely because the block layer currently cannot guarantee that tags > are unique per host. OK, pqi_alloc_io_request() does the real tag allocation, which looks a very bad implementation, cause the real tags can be used up easily. In my machine, there are 32 queues(32 cpu cores), each queue has 1013 tags, so there can be 32*1013 requests coming from block layer, meantime smartpqi can only handles 1013 requests. I guess it isn't hard to trigger softlock by running heavy/concurrent smartpqi IO. > > And the point of this patchset is exactly that the block layer will only > send up to 'can_queue' requests, irrespective on how many hardware > queues are present. That is only true for shared tags. Thanks, Ming
> > In my machine, there are 32 queues(32 cpu cores), each queue has 1013 > tags, so there can be 32*1013 requests coming from block layer, meantime > smartpqi can only handles 1013 requests. I guess it isn't hard to > trigger softlock by running heavy/concurrent smartpqi IO. Since pqi_alloc_io_request() does not use spinlock, disable preemption, etc., so I guess that there is more of a chance of simply IO timeout. But I see in pqi_get_physical_disk_info() that there is some intelligence to set the queue depth, which may reduce chance of timeout (by reducing disk queue depth). Not sure. > >> >> And the point of this patchset is exactly that the block layer will only >> send up to 'can_queue' requests, irrespective on how many hardware >> queues are present. > > That is only true for shared tags. > Thanks, John
On Tue, Jul 14, 2020 at 11:52:52AM +0100, John Garry wrote: > > > > In my machine, there are 32 queues(32 cpu cores), each queue has 1013 > > tags, so there can be 32*1013 requests coming from block layer, meantime > > smartpqi can only handles 1013 requests. I guess it isn't hard to > > trigger softlock by running heavy/concurrent smartpqi IO. > > Since pqi_alloc_io_request() does not use spinlock, disable preemption, rcu read lock is held when calling .queue_rq(), and preempt_disable() is implied in case that CONFIG_PREEMPT_RCU is off. A CPU looping in an RCU read-side critical section may cause some related issues, cause RCU's CPU Stall Detector will warn on that. > etc., so I guess that there is more of a chance of simply IO timeout. > > But I see in pqi_get_physical_disk_info() that there is some intelligence to > set the queue depth, which may reduce chance of timeout (by reducing disk > queue depth). Not sure. It may not work, see: [root@hp-dl380g10-01 mingl]# cat /sys/block/sd[a-f]/device/queue_depth 1013 1013 1013 1013 1013 1013 All sd[a-f] are smartpqi LUNs. Thanks, Ming
Subject: Re: [PATCH RFC v7 12/12] hpsa: enable host_tagset and switch to MQ EXTERNAL EMAIL: Do not click links or open attachments unless you know the content is safe On 10/06/2020 18:29, John Garry wrote: > From: Hannes Reinecke <hare@suse.de> > > The smart array HBAs can steer interrupt completion, so this patch > switches the implementation to use multiqueue and enables > 'host_tagset' as the HBA has a shared host-wide tagset. > >>Hi Don, >>I am preparing the next iteration of this series, and >>we're getting close to dropping the RFC tags. The >>series has grown a bit, and I am not sure what to do >>with hpsa support. >>The latest versions of this series have not been tested for hpsa, AFAIK. >>Can you let me know if you can test and review this >>patch? Or someone else let me know it's tested (Hannes?) Thanks Yes, I'll run my testing soon. Don > Signed-off-by: Hannes Reinecke <hare@suse.de> > --- > drivers/scsi/hpsa.c | 44 +++++++------------------------------------- > drivers/scsi/hpsa.h | 1 - > 2 files changed, 7 insertions(+), 38 deletions(-) > > diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index > 1e9302e99d05..f807f9bdae85 100644 > --- a/drivers/scsi/hpsa.c > +++ b/drivers/scsi/hpsa.c > @@ -980,6 +980,7 @@ static struct scsi_host_template hpsa_driver_template = { > .shost_attrs = hpsa_shost_attrs, > .max_sectors = 2048, > .no_write_same = 1, > + .host_tagset = 1, > }; > > static inline u32 next_command(struct ctlr_info *h, u8 q) @@ > -1144,12 +1145,14 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, > static void __enqueue_cmd_and_start_io(struct ctlr_info *h, > struct CommandList *c, int reply_queue) > { > + u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); > + > dial_down_lockup_detection_during_fw_flash(h, c); > atomic_inc(&h->commands_outstanding); > if (c->device) > atomic_inc(&c->device->commands_outstanding); > > - reply_queue = h->reply_map[raw_smp_processor_id()]; > + reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); > switch (c->cmd_type) { > case CMD_IOACCEL1: > set_ioaccel1_performant_mode(h, c, reply_queue); @@ > -5653,8 +5656,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) > /* Get the ptr to our adapter structure out of cmd->host. */ > h = sdev_to_hba(cmd->device); > > - BUG_ON(cmd->request->tag < 0); > - > dev = cmd->device->hostdata; > if (!dev) { > cmd->result = DID_NO_CONNECT << 16; @@ -5830,7 +5831,7 > @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) > sh->hostdata[0] = (unsigned long) h; > sh->irq = pci_irq_vector(h->pdev, 0); > sh->unique_id = sh->irq; > - > + sh->nr_hw_queues = h->msix_vectors > 0 ? h->msix_vectors : 1; > h->scsi_host = sh; > return 0; > } > @@ -5856,7 +5857,8 @@ static int hpsa_scsi_add_host(struct ctlr_info *h) > */ > static int hpsa_get_cmd_index(struct scsi_cmnd *scmd) > { > - int idx = scmd->request->tag; > + u32 blk_tag = blk_mq_unique_tag(scmd->request); > + int idx = blk_mq_unique_tag_to_tag(blk_tag); > > if (idx < 0) > return idx; > @@ -7456,26 +7458,6 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) > h->msix_vectors = 0; > } > > -static void hpsa_setup_reply_map(struct ctlr_info *h) -{ > - const struct cpumask *mask; > - unsigned int queue, cpu; > - > - for (queue = 0; queue < h->msix_vectors; queue++) { > - mask = pci_irq_get_affinity(h->pdev, queue); > - if (!mask) > - goto fallback; > - > - for_each_cpu(cpu, mask) > - h->reply_map[cpu] = queue; > - } > - return; > - > -fallback: > - for_each_possible_cpu(cpu) > - h->reply_map[cpu] = 0; > -} > - > /* If MSI/MSI-X is supported by the kernel we will try to enable it on > * controllers that are capable. If not, we use legacy INTx mode. > */ > @@ -7872,9 +7854,6 @@ static int hpsa_pci_init(struct ctlr_info *h) > if (err) > goto clean1; > > - /* setup mapping between CPU and reply queue */ > - hpsa_setup_reply_map(h); > - > err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); > if (err) > goto clean2; /* intmode+region, pci */ > @@ -8613,7 +8592,6 @@ static struct workqueue_struct > *hpsa_create_controller_wq(struct ctlr_info *h, > > static void hpda_free_ctlr_info(struct ctlr_info *h) > { > - kfree(h->reply_map); > kfree(h); > } > > @@ -8622,14 +8600,6 @@ static struct ctlr_info *hpda_alloc_ctlr_info(void) > struct ctlr_info *h; > > h = kzalloc(sizeof(*h), GFP_KERNEL); > - if (!h) > - return NULL; > - > - h->reply_map = kcalloc(nr_cpu_ids, sizeof(*h->reply_map), GFP_KERNEL); > - if (!h->reply_map) { > - kfree(h); > - return NULL; > - } > return h; > } > > diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index > f8c88fc7b80a..ea4a609e3eb7 100644 > --- a/drivers/scsi/hpsa.h > +++ b/drivers/scsi/hpsa.h > @@ -161,7 +161,6 @@ struct bmic_controller_parameters { > #pragma pack() > > struct ctlr_info { > - unsigned int *reply_map; > int ctlr; > char devname[8]; > char *product_name; >
On 10/06/2020 18:29, John Garry wrote: > From: Hannes Reinecke <hare@suse.de> > > The smart array HBAs can steer interrupt completion, so this patch > switches the implementation to use multiqueue and enables > 'host_tagset' as the HBA has a shared host-wide tagset. > >>Hi Don, >>I am preparing the next iteration of this series, and >>we're getting close to dropping the RFC tags. The >>series has grown a bit, and I am not sure what to do >>with hpsa support. >>The latest versions of this series have not been tested for hpsa, AFAIK. >>Or someone else let me know it's tested (Hannes?) >>Thanks John, I cloned: https://github.com/hisilicon/kernel-dev switched to branch: origin/private-topic-blk-mq-shared-tags-rfc-v8 And built the kernel. The hpsa driver oopsed on load. It was attempting to do driver initiated commands, so there would need to be some reserved tags set aside to communicate with the controller. Was I supposed to add this patch on top of Hannes's hpsa patches? The patches in the indicated series seem to be included in the branch. ---- [ 13.340977] HP HPSA Driver (v 3.4.20-170) [ 13.374719] usbcore: registered new interface driver usb-storage [ 13.379626] hpsa 0000:0d:00.0: can't disable ASPM; OS doesn't have ASPM control [ 13.473790] scsi host0: scsi_eh_0: sleeping [ 13.475191] scsi host0: uas [ 13.487978] hpsa 0000:0d:00.0: Logical aborts not supported [ 13.498111] tg3 0000:02:00.0 eth0: Tigon3 [partno(N/A) rev 5719001] (PCI Express) MAC address d4:c9:ef:ce:0a:c4 [ 13.498113] tg3 0000:02:00.0 eth0: attached PHY is 5719C (10/100/1000Base-T Ethernet) (WireSpeed[1], EEE[1]) [ 13.498116] tg3 0000:02:00.0 eth0: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[1] TSOcap[1] [ 13.498117] tg3 0000:02:00.0 eth0: dma_rwctrl[00000001] dma_mask[64-bit] [ 13.499661] scsi host1: scsi_eh_1: sleeping [ 13.522611] tg3 0000:02:00.1 eth1: Tigon3 [partno(N/A) rev 5719001] (PCI Express) MAC address d4:c9:ef:ce:0a:c5 [ 13.541519] usbcore: registered new interface driver uas [ 13.549508] scsi 0:0:0:0: Direct-Access ASMT 2105 0 PQ: 0 ANSI: 6 [ 13.576119] tg3 0000:02:00.1 eth1: attached PHY is 5719C (10/100/1000Base-T Ethernet) (WireSpeed[1], EEE[1]) [ 14.046521] tg3 0000:02:00.1 eth1: RXcsums[1] LinkChgREG[0] MIirq[0] ASF[1] TSOcap[1] [ 14.046524] tg3 0000:02:00.1 eth1: dma_rwctrl[00000001] dma_mask[64-bit] [ 14.047114] BUG: kernel NULL pointer dereference, address: 0000000000000010 [ 14.193228] #PF: supervisor read access in kernel mode [ 14.193228] #PF: error_code(0x0000) - not-present page [ 14.193229] PGD 0 P4D 0 [ 14.193232] Oops: 0000 [#1] SMP PTI [ 14.193235] CPU: 0 PID: 495 Comm: kworker/0:8 Not tainted 5.8.0-rc1-host-tagset+ #3 [ 14.193236] Hardware name: HP ProLiant ML350 Gen9/ProLiant ML350 Gen9, BIOS P92 10/17/2018 [ 14.193243] Workqueue: events work_for_cpu_fn [ 14.470674] RIP: 0010:blk_mq_unique_tag+0x5/0x20 [ 14.470676] Code: cd 0f 1f 40 00 0f 1f 44 00 00 8b 87 cc 00 00 00 83 f8 02 75 03 83 06 01 b8 01 00 00 00 c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 <48> 8b 47 10 0f b7 57 20 8b 80 94 01 00 00 c1 e0 10 09 d0 c3 0f 1f [ 14.470677] RSP: 0000:ffff989f42893d08 EFLAGS: 00010246 [ 14.470680] RAX: ffffffffc0493f80 RBX: ffff8ab761c00000 RCX: 0000000000000000 [ 14.717021] RDX: ffff8ab9b7600000 RSI: ffff8ab761c00000 RDI: 0000000000000000 [ 14.717021] RBP: ffff8ab9a5b98000 R08: ffffffffffffffff R09: 0000000000000000 [ 14.717022] R10: ffff8ab8b5280070 R11: 0000000000000000 R12: 000000000000000a [ 14.717022] R13: 0000000000000002 R14: ffff8ab761c00000 R15: ffffffffc0493b60 [ 14.717023] FS: 0000000000000000(0000) GS:ffff8ab9b7600000(0000) knlGS:0000000000000000 [ 14.717024] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 14.717025] CR2: 0000000000000010 CR3: 000000024f33e006 CR4: 00000000001606f0 [ 14.717025] Call Trace: [ 14.717034] __enqueue_cmd_and_start_io.isra.60+0x20/0x170 [hpsa] [ 14.717039] hpsa_scsi_do_simple_cmd.isra.62+0x6b/0xd0 [hpsa] [ 14.717042] hpsa_scsi_do_simple_cmd_with_retry+0x63/0x160 [hpsa] [ 14.717045] hpsa_scsi_do_inquiry+0x62/0xc0 [hpsa] [ 14.717048] hpsa_init_one+0x1167/0x1400 [hpsa] [ 14.717052] local_pci_probe+0x42/0x80 [ 14.717054] work_for_cpu_fn+0x16/0x20 [ 14.717057] process_one_work+0x1a7/0x370 [ 14.717059] ? process_one_work+0x370/0x370 [ 14.717061] worker_thread+0x1c9/0x370 [ 14.717062] ? process_one_work+0x370/0x370 [ 14.717064] kthread+0x114/0x130 [ 14.717065] ? kthread_park+0x80/0x80 [ 14.717068] ret_from_fork+0x22/0x30 [ 14.717070] Modules linked in: crc32c_intel libahci(+) uas tg3(+) libata usb_storage i2c_algo_bit hpsa(+) scsi_transport_sas wmi dm_mirror dm_region_hash dm_log dm_mod [ 14.717077] CR2: 0000000000000010 [ 14.717099] ---[ end trace 3845f459e9223caa ]--- [ 14.724750] ERST: [Firmware Warn]: Firmware does not respond in time. [ 14.724753] RIP: 0010:blk_mq_unique_tag+0x5/0x20 [ 14.724754] Code: cd 0f 1f 40 00 0f 1f 44 00 00 8b 87 cc 00 00 00 83 f8 02 75 03 83 06 01 b8 01 00 00 00 c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 <48> 8b 47 10 0f b7 57 20 8b 80 94 01 00 00 c1 e0 10 09 d0 c3 0f 1f [ 14.724755] RSP: 0000:ffff989f42893d08 EFLAGS: 00010246 [ 14.724756] RAX: ffffffffc0493f80 RBX: ffff8ab761c00000 RCX: 0000000000000000 [ 14.724757] RDX: ffff8ab9b7600000 RSI: ffff8ab761c00000 RDI: 0000000000000000 [ 14.724757] RBP: ffff8ab9a5b98000 R08: ffffffffffffffff R09: 0000000000000000 [ 14.724758] R10: ffff8ab8b5280070 R11: 0000000000000000 R12: 000000000000000a [ 14.724758] R13: 0000000000000002 R14: ffff8ab761c00000 R15: ffffffffc0493b60 [ 14.724759] FS: 0000000000000000(0000) GS:ffff8ab9b7600000(0000) knlGS:0000000000000000 [ 14.724760] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 14.724760] CR2: 0000000000000010 CR3: 000000024f33e006 CR4: 00000000001606f0 [ 14.724761] Kernel panic - not syncing: Fatal exception [ 14.724833] Kernel Offset: 0x38400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 16.487017] ---[ end Kernel panic - not syncing: Fatal exception ]--- > Signed-off-by: Hannes Reinecke <hare@suse.de> > --- > drivers/scsi/hpsa.c | 44 +++++++------------------------------------- > drivers/scsi/hpsa.h | 1 - > 2 files changed, 7 insertions(+), 38 deletions(-) > > diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index > 1e9302e99d05..f807f9bdae85 100644 > --- a/drivers/scsi/hpsa.c > +++ b/drivers/scsi/hpsa.c > @@ -980,6 +980,7 @@ static struct scsi_host_template hpsa_driver_template = { > .shost_attrs = hpsa_shost_attrs, > .max_sectors = 2048, > .no_write_same = 1, > + .host_tagset = 1, > }; > > static inline u32 next_command(struct ctlr_info *h, u8 q) @@ > -1144,12 +1145,14 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, > static void __enqueue_cmd_and_start_io(struct ctlr_info *h, > struct CommandList *c, int reply_queue) > { > + u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); > + > dial_down_lockup_detection_during_fw_flash(h, c); > atomic_inc(&h->commands_outstanding); > if (c->device) > atomic_inc(&c->device->commands_outstanding); > > - reply_queue = h->reply_map[raw_smp_processor_id()]; > + reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); > switch (c->cmd_type) { > case CMD_IOACCEL1: > set_ioaccel1_performant_mode(h, c, reply_queue); @@ > -5653,8 +5656,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) > /* Get the ptr to our adapter structure out of cmd->host. */ > h = sdev_to_hba(cmd->device); > > - BUG_ON(cmd->request->tag < 0); > - > dev = cmd->device->hostdata; > if (!dev) { > cmd->result = DID_NO_CONNECT << 16; @@ -5830,7 +5831,7 > @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) > sh->hostdata[0] = (unsigned long) h; > sh->irq = pci_irq_vector(h->pdev, 0); > sh->unique_id = sh->irq; > - > + sh->nr_hw_queues = h->msix_vectors > 0 ? h->msix_vectors : 1; > h->scsi_host = sh; > return 0; > } > @@ -5856,7 +5857,8 @@ static int hpsa_scsi_add_host(struct ctlr_info *h) > */ > static int hpsa_get_cmd_index(struct scsi_cmnd *scmd) > { > - int idx = scmd->request->tag; > + u32 blk_tag = blk_mq_unique_tag(scmd->request); > + int idx = blk_mq_unique_tag_to_tag(blk_tag); > > if (idx < 0) > return idx; > @@ -7456,26 +7458,6 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) > h->msix_vectors = 0; > } > > -static void hpsa_setup_reply_map(struct ctlr_info *h) -{ > - const struct cpumask *mask; > - unsigned int queue, cpu; > - > - for (queue = 0; queue < h->msix_vectors; queue++) { > - mask = pci_irq_get_affinity(h->pdev, queue); > - if (!mask) > - goto fallback; > - > - for_each_cpu(cpu, mask) > - h->reply_map[cpu] = queue; > - } > - return; > - > -fallback: > - for_each_possible_cpu(cpu) > - h->reply_map[cpu] = 0; > -} > - > /* If MSI/MSI-X is supported by the kernel we will try to enable it on > * controllers that are capable. If not, we use legacy INTx mode. > */ > @@ -7872,9 +7854,6 @@ static int hpsa_pci_init(struct ctlr_info *h) > if (err) > goto clean1; > > - /* setup mapping between CPU and reply queue */ > - hpsa_setup_reply_map(h); > - > err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); > if (err) > goto clean2; /* intmode+region, pci */ > @@ -8613,7 +8592,6 @@ static struct workqueue_struct > *hpsa_create_controller_wq(struct ctlr_info *h, > > static void hpda_free_ctlr_info(struct ctlr_info *h) > { > - kfree(h->reply_map); > kfree(h); > } > > @@ -8622,14 +8600,6 @@ static struct ctlr_info *hpda_alloc_ctlr_info(void) > struct ctlr_info *h; > > h = kzalloc(sizeof(*h), GFP_KERNEL); > - if (!h) > - return NULL; > - > - h->reply_map = kcalloc(nr_cpu_ids, sizeof(*h->reply_map), GFP_KERNEL); > - if (!h->reply_map) { > - kfree(h); > - return NULL; > - } > return h; > } > > diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index > f8c88fc7b80a..ea4a609e3eb7 100644 > --- a/drivers/scsi/hpsa.h > +++ b/drivers/scsi/hpsa.h > @@ -161,7 +161,6 @@ struct bmic_controller_parameters { > #pragma pack() > > struct ctlr_info { > - unsigned int *reply_map; > int ctlr; > char devname[8]; > char *product_name; >
Hi Don, Thanks for checking this. > I cloned: > https://github.com/hisilicon/kernel-dev > switched to branch: origin/private-topic-blk-mq-shared-tags-rfc-v8 I would have suggested to use v7 for now, but does not look relevant here. > > And built the kernel. The hpsa driver oopsed on load. It was attempting to do driver initiated commands, so there would need to be some reserved tags set aside to communicate with the controller. > > Was I supposed to add this patch on top of Hannes's hpsa patches? I didn't think so, but I now realize that it may be necessary here - please see below. And since Hannes's reserved commands work is not merged, I do not include it. > [ 14.717025] Call Trace: > [ 14.717034] __enqueue_cmd_and_start_io.isra.60+0x20/0x170 [hpsa] > [ 14.717039] hpsa_scsi_do_simple_cmd.isra.62+0x6b/0xd0 [hpsa] > [ 14.717042] hpsa_scsi_do_simple_cmd_with_retry+0x63/0x160 [hpsa] > [ 14.717045] hpsa_scsi_do_inquiry+0x62/0xc0 [hpsa] > [ 14.717048] hpsa_init_one+0x1167/0x1400 [hpsa] > [ 14.717052] local_pci_probe+0x42/0x80 > [ 14.717054] work_for_cpu_fn+0x16/0x20 > [ 14.717057] process_one_work+0x1a7/0x370 > [ 14.717059] ? process_one_work+0x370/0x370 > [ 14.717061] worker_thread+0x1c9/0x370 > [ 14.717062] ? process_one_work+0x370/0x370 > [ 14.717064] kthread+0x114/0x130 > [ 14.717065] ? kthread_park+0x80/0x80 > [ 14.717068] ret_from_fork+0x22/0x30 > [ 14.717070] Modules linked in: crc32c_intel libahci(+) uas tg3(+) libata usb_storage i2c_algo_bit hpsa(+) scsi_transport_sas wmi dm_mirror dm_region_hash dm_log dm_mod > [ 14.717077] CR2: 0000000000000010 > [ 14.717099] ---[ end trace 3845f459e9223caa ]--- > [ 14.724750] ERST: [Firmware Warn]: Firmware does not respond in time. > [ 14.724753] RIP: 0010:blk_mq_unique_tag+0x5/0x20 > [ 14.724754] Code: cd 0f 1f 40 00 0f 1f 44 00 00 8b 87 cc 00 00 00 83 f8 02 75 03 83 06 01 b8 01 00 00 00 c3 0f 1f 80 00 00 00 00 0f 1f 44 00 00 <48> 8b 47 10 0f b7 57 20 8b 80 94 01 00 00 c1 e0 10 09 d0 c3 0f 1f > [ 14.724755] RSP: 0000:ffff989f42893d08 EFLAGS: 00010246 > [ 14.724756] RAX: ffffffffc0493f80 RBX: ffff8ab761c00000 RCX: 0000000000000000 > [ 14.724757] RDX: ffff8ab9b7600000 RSI: ffff8ab761c00000 RDI: 0000000000000000 > [ 14.724757] RBP: ffff8ab9a5b98000 R08: ffffffffffffffff R09: 0000000000000000 > [ 14.724758] R10: ffff8ab8b5280070 R11: 0000000000000000 R12: 000000000000000a > [ 14.724758] R13: 0000000000000002 R14: ffff8ab761c00000 R15: ffffffffc0493b60 > [ 14.724759] FS: 0000000000000000(0000) GS:ffff8ab9b7600000(0000) knlGS:0000000000000000 > [ 14.724760] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > [ 14.724760] CR2: 0000000000000010 CR3: 000000024f33e006 CR4: 00000000001606f0 > [ 14.724761] Kernel panic - not syncing: Fatal exception > [ 14.724833] Kernel Offset: 0x38400000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) > [ 16.487017] ---[ end Kernel panic - not syncing: Fatal exception ]--- > > > >> Signed-off-by: Hannes Reinecke<hare@suse.de> >> --- >> drivers/scsi/hpsa.c | 44 +++++++------------------------------------- >> drivers/scsi/hpsa.h | 1 - >> 2 files changed, 7 insertions(+), 38 deletions(-) >> >> diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index >> 1e9302e99d05..f807f9bdae85 100644 >> --- a/drivers/scsi/hpsa.c >> +++ b/drivers/scsi/hpsa.c >> @@ -980,6 +980,7 @@ static struct scsi_host_template hpsa_driver_template = { >> .shost_attrs = hpsa_shost_attrs, >> .max_sectors = 2048, >> .no_write_same = 1, >> + .host_tagset = 1, >> }; >> >> static inline u32 next_command(struct ctlr_info *h, u8 q) @@ >> -1144,12 +1145,14 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, >> static void __enqueue_cmd_and_start_io(struct ctlr_info *h, >> struct CommandList *c, int reply_queue) >> { >> + u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); For the hpsa_scsi_do_inquiry() -> fill_cmd(HPSA_INQUIRY) call, c->scsi_cmd = SCSI_CMD_BUSY, which just seems to be a pointer flag. And so I guess that c->scsi_cmd->request == NULL, and we deference this in blk_mq_unique_tag() -> oops. I figure that the code should look like this for now: static void __enqueue_cmd_and_start_io(struct ctlr_info *h, struct CommandList *c, int reply_queue) { if (c->scsi_cmd->request) { u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); } dial_down_lockup_detection_during_fw_flash(h, c); atomic_inc(&h->commands_outstanding); if (c->device) atomic_inc(&c->device->commands_outstanding); switch (c->cmd_type) { But then reply_queue may be = DEFAULT_REPLY_QUEUE (=1), so I am not sure if that is a problem. However this issue should go away with Hannes's reserved command work, as we allocate a "real" SCSI cmd there. @Hannes, any suggestion what to do here? >> + >> dial_down_lockup_detection_during_fw_flash(h, c); >> atomic_inc(&h->commands_outstanding); >> if (c->device) >> atomic_inc(&c->device->commands_outstanding); >> >> - reply_queue = h->reply_map[raw_smp_processor_id()]; >> + reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); >> switch (c->cmd_type) { >> case CMD_IOACCEL1: >> set_ioaccel1_performant_mode(h, c, reply_queue); @@ >> -5653,8 +5656,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) >> /* Get the ptr to our adapter structure out of cmd->host. */ >> h = sdev_to_hba(cmd->device); >> >> - BUG_ON(cmd->request->tag < 0); >> - >> dev = cmd->device->hostdata; >> if (!dev) { >> cmd->result = DID_NO_CONNECT << 16; @@ -5830,7 +5831,7 >> @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) >> sh->hostdata[0] = (unsigned long) h; >> sh->irq = pci_irq_vector(h->pdev, 0); >> sh->unique_id = sh->irq; >> - >> + sh->nr_hw_queues = h->msix_vectors > 0 ? h->msix_vectors : 1; >> h->scsi_host = sh; >> return 0; >> } >> @@ -5856,7 +5857,8 @@ static int hpsa_scsi_add_host(struct ctlr_info *h) >> */ >> static int hpsa_get_cmd_index(struct scsi_cmnd *scmd) >> { >> - int idx = scmd->request->tag; >> + u32 blk_tag = blk_mq_unique_tag(scmd->request); >> + int idx = blk_mq_unique_tag_to_tag(blk_tag); @Hannes, This looks like a pointless change - we make a 32b unique tag, including the request->tag, and then convert back to the request->tag. >> >> if (idx < 0) >> return idx; >> @@ -7456,26 +7458,6 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) >> h->msix_vectors = 0; >> } Thanks, John
Subject: Re: [PATCH RFC v7 12/12] hpsa: enable host_tagset and switch to MQ >> >> Hi Don, >> >> I am preparing the next iteration of this series, and we're getting >> close to dropping the RFC tags. The series has grown a bit, and I am >> not sure what to do with hpsa support. >> >> The latest versions of this series have not been tested for hpsa, AFAIK. >>v7 is here: >>https://github.com/hisilicon/kernel-dev/commits/private-topic-blk-mq-shared-tags-rfc-v7 >>So that should be good to test with for now. cloned https://github.com/hisilicon/kernel-dev branch origin/private-topic-blk-mq-shared-tags-rfc-v7 The driver did not load, so I cherry-picked from git://git.kernel.org/pub/scm/linux/kernel/git/hare/scsi-devel.git branch origin/reserved-tags.v6 the following patches: 6a9d1a96ea41 hpsa: move hpsa_hba_inquiry after scsi_add_host() eeb5cd1fca58 hpsa: use reserved commands 7df7d8382902 hpsa: use scsi_host_busy_iter() to traverse outstanding commands 485881d6d8dc hpsa: drop refcount field from CommandList c4980ad5e5cb scsi: implement reserved command handling 34d03fa945c0 scsi: add scsi_{get,put}_internal_cmd() helper 4556e50450c8 block: add flag for internal commands 138125f74b25 scsi: hpsa: Lift {BIG_,}IOCTL_Command_struct copy{in,out} into hpsa_ioctl() cb17c1b69b17 scsi: hpsa: Don't bother with vmalloc for BIG_IOCTL_Command_struct 10100ffd5f65 scsi: hpsa: Get rid of compat_alloc_user_space() 06b43f968db5 scsi: hpsa: hpsa_ioctl(): Tidy up a bit The driver loads and I ran some mke2fs, mount/umount tests, but I am getting an extra devices in the list which does not seem to be coming from hpsa driver. I have not yet had time to diagnose this issue. lsscsi [1:0:0:0] disk ASMT 2105 0 /dev/sdi [14:0:-1:0] type??? nullnull nullnullnullnull null - [14:0:0:0] storage HP H240 7.19 - [14:0:1:0] disk ATA MB002000GWFGH HPG0 /dev/sda [14:0:2:0] disk ATA MB002000GWFGH HPG0 /dev/sdb [14:0:3:0] disk HP EF0450FARMV HPD5 /dev/sdc [14:0:4:0] disk ATA MB002000GWFGH HPG0 /dev/sdd [14:0:5:0] disk ATA MB002000GWFGH HPG0 /dev/sde [14:0:6:0] disk HP EF0450FARMV HPD5 /dev/sdf [14:0:7:0] disk ATA VB0250EAVER HPG7 /dev/sdg [14:0:8:0] disk ATA MB0500GCEHF HPGC /dev/sdh [14:0:9:0] enclosu HP H240 7.19 - [15:0:-1:0] type??? nullnull nullnullnullnull null - [15:0:0:0] storage HP P440 7.19 - [15:1:0:0] disk HP LOGICAL VOLUME 7.19 /dev/sdj [15:1:0:1] disk HP LOGICAL VOLUME 7.19 /dev/sdk [15:1:0:2] disk HP LOGICAL VOLUME 7.19 /dev/sdl [15:1:0:3] disk HP LOGICAL VOLUME 7.19 /dev/sdm [16:0:-1:0] type??? nullnull nullnullnullnull null - [16:0:0:0] storage HP P441 7.19 -
On 03/08/2020 21:39, Don.Brace@microchip.com wrote: Hi Don, >>> at should be good to test with for now. > clonedhttps://github.com/hisilicon/kernel-dev > branch origin/private-topic-blk-mq-shared-tags-rfc-v7 > > The driver did not load, so I cherry-picked from > > git://git.kernel.org/pub/scm/linux/kernel/git/hare/scsi-devel.git > branch origin/reserved-tags.v6 > > the following patches: > 6a9d1a96ea41 hpsa: move hpsa_hba_inquiry after scsi_add_host() > eeb5cd1fca58 hpsa: use reserved commands > 7df7d8382902 hpsa: use scsi_host_busy_iter() to traverse outstanding commands > 485881d6d8dc hpsa: drop refcount field from CommandList > c4980ad5e5cb scsi: implement reserved command handling > 34d03fa945c0 scsi: add scsi_{get,put}_internal_cmd() helper > 4556e50450c8 block: add flag for internal commands > 138125f74b25 scsi: hpsa: Lift {BIG_,}IOCTL_Command_struct copy{in,out} into hpsa_ioctl() > cb17c1b69b17 scsi: hpsa: Don't bother with vmalloc for BIG_IOCTL_Command_struct > 10100ffd5f65 scsi: hpsa: Get rid of compat_alloc_user_space() > 06b43f968db5 scsi: hpsa: hpsa_ioctl(): Tidy up a bit > > The driver loads and I ran some mke2fs, mount/umount tests, ok, great > but I am getting an extra devices in the list which does not > seem to be coming from hpsa driver. > > I have not yet had time to diagnose this issue. > > lsscsi > [1:0:0:0] disk ASMT 2105 0 /dev/sdi > [14:0:-1:0] type??? nullnull nullnullnullnull null - > [14:0:0:0] storage HP H240 7.19 - > [14:0:1:0] disk ATA MB002000GWFGH HPG0 /dev/sda > [14:0:2:0] disk ATA MB002000GWFGH HPG0 /dev/sdb > [14:0:3:0] disk HP EF0450FARMV HPD5 /dev/sdc > [14:0:4:0] disk ATA MB002000GWFGH HPG0 /dev/sdd > [14:0:5:0] disk ATA MB002000GWFGH HPG0 /dev/sde > [14:0:6:0] disk HP EF0450FARMV HPD5 /dev/sdf > [14:0:7:0] disk ATA VB0250EAVER HPG7 /dev/sdg > [14:0:8:0] disk ATA MB0500GCEHF HPGC /dev/sdh > [14:0:9:0] enclosu HP H240 7.19 - > [15:0:-1:0] type??? nullnull nullnullnullnull null - > [15:0:0:0] storage HP P440 7.19 - > [15:1:0:0] disk HP LOGICAL VOLUME 7.19 /dev/sdj > [15:1:0:1] disk HP LOGICAL VOLUME 7.19 /dev/sdk > [15:1:0:2] disk HP LOGICAL VOLUME 7.19 /dev/sdl > [15:1:0:3] disk HP LOGICAL VOLUME 7.19 /dev/sdm > [16:0:-1:0] type??? nullnull nullnullnullnull null - > [16:0:0:0] storage HP P441 7.19 - > > I assume that you are missing some other patches from that branch, like these: 77dcb92c31ae scsi: revamp host device handling 6e9884aefe66 scsi: Use dummy inquiry data for the host device a381637f8a6e scsi: use real inquiry data when initialising devices @Hannes, Any plans to get this series going again? Thanks, John
Subject: Re: [PATCH RFC v7 12/12] hpsa: enable host_tagset and switch to MQ Hi Don, >>> at should be good to test with for now. > clonedhttps://github.com/hisilicon/kernel-dev > branch origin/private-topic-blk-mq-shared-tags-rfc-v7 > > The driver did not load, so I cherry-picked from > > git://git.kernel.org/pub/scm/linux/kernel/git/hare/scsi-devel.git > branch origin/reserved-tags.v6 ok, great > but I am getting an extra devices in the list which does not seem to > be coming from hpsa driver. > >>I assume that you are missing some other patches from >>that branch, like >>these: >>77dcb92c31ae scsi: revamp host device handling >>6e9884aefe66 scsi: Use dummy inquiry data for the host >>device a381637f8a6e scsi: use real inquiry data when >>initialising devices >>@Hannes, Any plans to get this series going again? I cherry-picked the following and this resolves the issue. 77dcb92c31ae scsi: revamp host device handling 6e9884aefe66 scsi: Use dummy inquiry data for the host device a381637f8a6e scsi: use real inquiry data when initialising devices I'll continue with more I/O stress testing. Thanks for the patch suggestions, Don
On 04/08/2020 16:18, Don.Brace@microchip.com wrote: >> git://git.kernel.org/pub/scm/linux/kernel/git/hare/scsi-devel.git >> branch origin/reserved-tags.v6 > ok, great > >> but I am getting an extra devices in the list which does not seem to >> be coming from hpsa driver. >> >>> I assume that you are missing some other patches from >>that branch, like >>> these: >>> 77dcb92c31ae scsi: revamp host device handling >>> 6e9884aefe66 scsi: Use dummy inquiry data for the host >>device a381637f8a6e scsi: use real inquiry data when >>initialising devices >>> @Hannes, Any plans to get this series going again? > I cherry-picked the following and this resolves the issue. > 77dcb92c31ae scsi: revamp host device handling > 6e9884aefe66 scsi: Use dummy inquiry data for the host device > a381637f8a6e scsi: use real inquiry data when initialising devices > I'll continue with more I/O stress testing. ok, great. Please let me know about your testing, as I might just add that series to the v8 branch. Cheers, John
Subject: Re: [PATCH RFC v7 12/12] hpsa: enable host_tagset and switch to MQ > I'll continue with more I/O stress testing. >>ok, great. Please let me know about your testing, as I might just add that series to the v8 branch. >>Cheers, >>John I cloned your branch from https://github.com/hisilicon/kernel-dev and checkout branch: origin/private-topic-blk-mq-shared-tags-rfc-v7 By themselves, the branch compiled but the driver did not load. I cherry-picked the following patches from Hannes: git://git.kernel.org/pub/scm/linux/kernel/git/hare/scsi-devel.git branch: reserved-tags.v6 6a9d1a96ea41 hpsa: move hpsa_hba_inquiry after scsi_add_host() eeb5cd1fca58 hpsa: use reserved commands confict: removal of function hpsa_get_cmd_index, non-functional issue. 7df7d8382902 hpsa: use scsi_host_busy_iter() to traverse outstanding commands 485881d6d8dc hpsa: drop refcount field from CommandList c4980ad5e5cb scsi: implement reserved command handling conflict: drivers/scsi/scsi_lib.c minor context issue adding comment, non-functional issue. 34d03fa945c0 scsi: add scsi_{get,put}_internal_cmd() helper conflict: drivers/scsi/scsi_lib.c minor context issue around scsi_get_internal_cmd when adding new comment, non-functional issue 4556e50450c8 block: add flag for internal commands 138125f74b25 scsi: hpsa: Lift {BIG_,}IOCTL_Command_struct copy{in,out} into hpsa_ioctl() cb17c1b69b17 scsi: hpsa: Don't bother with vmalloc for BIG_IOCTL_Command_struct 10100ffd5f65 scsi: hpsa: Get rid of compat_alloc_user_space() 06b43f968db5 scsi: hpsa: hpsa_ioctl(): Tidy up a bit a381637f8a6e scsi: use real inquiry data when initialising devices 6e9884aefe66 scsi: Use dummy inquiry data for the host device 77dcb92c31ae scsi: revamp host device handling After the above patches were applied, the driver loaded and I ran the following tests: insmod/rmmod reboot Ran an I/O stress test consisting of: 6 SATA HBA disks 2 SAS HBA disks 2 RAID 5 volumes using 3 SAS HDDs 2 RAID 5 volumes using 3 SAS SSDs (ioaccel enabled) 1) fio tests to raw disks. 2) mke2fs tests 3) mount 4) fio to file systems 5) umount 6) fsck And running reset tests in parallel to the above 6 tests using sg_reset I ran some performance tests to HBA and LOGICAL VOLUMES and did not find a performance regression. We are also reconsidering changing smartpqi over to use host tags but in some preliminary performance tests, I found a performance regression. Note: I only used your V7 patches for smartpqi. I have not had time to determine what is causing this, but wanted to make note of this. For hpsa: With all of the patches noted above, Tested-by: Don Brace <don.brace@microsemi.com> For hpsa specific patches: Reviewed-by: Don Brace <don.brace@microsemi.com> Thanks for your input and your patches, Don
On 14/08/2020 22:04, Don.Brace@microchip.com wrote: Hi Don, > I cloned your branch fromhttps://github.com/hisilicon/kernel-dev > and checkout branch: origin/private-topic-blk-mq-shared-tags-rfc-v7 > > By themselves, the branch compiled but the driver did not load. > > I cherry-picked the following patches from Hannes: > git://git.kernel.org/pub/scm/linux/kernel/git/hare/scsi-devel.git > branch: reserved-tags.v6 > > 6a9d1a96ea41 hpsa: move hpsa_hba_inquiry after scsi_add_host() > eeb5cd1fca58 hpsa: use reserved commands > confict: removal of function hpsa_get_cmd_index, > non-functional issue. > 7df7d8382902 hpsa: use scsi_host_busy_iter() to traverse outstanding commands > 485881d6d8dc hpsa: drop refcount field from CommandList > c4980ad5e5cb scsi: implement reserved command handling > conflict: drivers/scsi/scsi_lib.c > minor context issue adding comment, > non-functional issue. > 34d03fa945c0 scsi: add scsi_{get,put}_internal_cmd() helper > conflict: drivers/scsi/scsi_lib.c > minor context issue around scsi_get_internal_cmd > when adding new comment, > non-functional issue > 4556e50450c8 block: add flag for internal commands > 138125f74b25 scsi: hpsa: Lift {BIG_,}IOCTL_Command_struct copy{in,out} into hpsa_ioctl() > cb17c1b69b17 scsi: hpsa: Don't bother with vmalloc for BIG_IOCTL_Command_struct > 10100ffd5f65 scsi: hpsa: Get rid of compat_alloc_user_space() > 06b43f968db5 scsi: hpsa: hpsa_ioctl(): Tidy up a bit > a381637f8a6e scsi: use real inquiry data when initialising devices > 6e9884aefe66 scsi: Use dummy inquiry data for the host device > 77dcb92c31ae scsi: revamp host device handling > > After the above patches were applied, the driver loaded and I ran the following tests: > insmod/rmmod > reboot > Ran an I/O stress test consisting of: > 6 SATA HBA disks > 2 SAS HBA disks > 2 RAID 5 volumes using 3 SAS HDDs > 2 RAID 5 volumes using 3 SAS SSDs (ioaccel enabled) > > 1) fio tests to raw disks. > 2) mke2fs tests > 3) mount > 4) fio to file systems > 5) umount > 6) fsck > > And running reset tests in parallel to the above 6 tests using sg_reset > > I ran some performance tests to HBA and LOGICAL VOLUMES and did not find a performance regression. > ok, thanks for this info. I appreciate it. > We are also reconsidering changing smartpqi over to use host tags but in some preliminary performance tests, I found a performance regression. > Note: I only used your V7 patches for smartpqi. > I have not had time to determine what is causing this, but wanted to make note of this. Thanks. Please note that we have been looking at many performances improvements since v7, and these will be included in v8, so maybe I can still include smartpqi in the v8 series and you can retest if you want. > > For hpsa: > > With all of the patches noted above, > Tested-by: Don Brace<don.brace@microsemi.com> > > For hpsa specific patches: > Reviewed-by: Don Brace<don.brace@microsemi.com> Thanks. Please also note that I want to drop the RFC tag for v8 series, so I will just have to note that we still depend on Hannes' work for hpsa. We could also change the patch, but let's see how we go. Cheers, John
Subject: Re: [PATCH RFC v7 12/12] hpsa: enable host_tagset and switch to MQ > We are also reconsidering changing smartpqi over to use host tags but in some preliminary performance tests, I found a performance regression. > Note: I only used your V7 patches for smartpqi. > I have not had time to determine what is causing this, but wanted to make note of this. >>Thanks. Please note that we have been looking at many >>performances improvements since v7, and these will be >>included in v8, so maybe I can still include smartpqi in >>the v8 series and you can retest if you want. Sure, Thanks for your patches Don > > For hpsa: > > With all of the patches noted above, > Tested-by: Don Brace<don.brace@microsemi.com> > > For hpsa specific patches: > Reviewed-by: Don Brace<don.brace@microsemi.com> Thanks. Please also note that I want to drop the RFC tag for v8 series, so I will just have to note that we still depend on Hannes' work for hpsa. We could also change the patch, but let's see how we go. Cheers, John
On 8/17/20 8:39 PM, Don.Brace@microchip.com wrote: > Subject: Re: [PATCH RFC v7 12/12] hpsa: enable host_tagset and switch to MQ > >> We are also reconsidering changing smartpqi over to use host tags but in some preliminary performance tests, >> I found a performance regression. >> Note: I only used your V7 patches for smartpqi. >> I have not had time to determine what is causing this, but wanted to make note of this. > >> Thanks. Please note that we have been looking at many >>performances improvements since v7, and these will >> be included in v8, so maybe I can still include smartpqi in >>the v8 series and you can retest if you want. > > Sure, > Thanks for your patches > Don > Well, I had been looking at smartpqi and its tag handling, and found it no easy match with the blk-mq implementation we have in linux. (Which is actually quite curious, as both had been developed around the same time, so I would've thought that they would be similar ...) Anyhow: for smartpqi each sgl element use up one slot in the submission queue, so the total number of SQEs for one command is 1 (for the command itself) + number of sgl elements. With that the queue size is actually dynamic, and depends on the size of the commands being sent. This doesn't map easily onto blk-mq concepts, where we assume that each command consumes one SQE. So currently the smartpqi driver has its own heuristics for determining the queue depth, but I fear that this also will eat up quite some improvements we might be getting from using host_tagset. (Especially as the smartpqi driver doesn't actually _has_ a host_tagset, but rather the mapping withing the driver exposes something which looks like a host tagset ...) What I really would like to see is to update blk-mq to handle smartpqi properly; this might even be beneficial to other drivers like mpt3sas which have a similar concept (called 'chain_tracker' there). One idea would be to allocate additional tags (one for each sgl) such that the tag bitmap reflects the status of the submission queue. Maybe we can update my reserved tags patchset for that ... hmmm. Cheers, Hannes -- Dr. Hannes Reinecke Kernel Storage Architect hare@suse.de +49 911 74053 688 SUSE Software Solutions Germany GmbH, Maxfeldstr. 5, 90409 Nürnberg HRB 36809 (AG Nürnberg), GF: Felix Imendörffer
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index 1e9302e99d05..f807f9bdae85 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -980,6 +980,7 @@ static struct scsi_host_template hpsa_driver_template = { .shost_attrs = hpsa_shost_attrs, .max_sectors = 2048, .no_write_same = 1, + .host_tagset = 1, }; static inline u32 next_command(struct ctlr_info *h, u8 q) @@ -1144,12 +1145,14 @@ static void dial_up_lockup_detection_on_fw_flash_complete(struct ctlr_info *h, static void __enqueue_cmd_and_start_io(struct ctlr_info *h, struct CommandList *c, int reply_queue) { + u32 blk_tag = blk_mq_unique_tag(c->scsi_cmd->request); + dial_down_lockup_detection_during_fw_flash(h, c); atomic_inc(&h->commands_outstanding); if (c->device) atomic_inc(&c->device->commands_outstanding); - reply_queue = h->reply_map[raw_smp_processor_id()]; + reply_queue = blk_mq_unique_tag_to_hwq(blk_tag); switch (c->cmd_type) { case CMD_IOACCEL1: set_ioaccel1_performant_mode(h, c, reply_queue); @@ -5653,8 +5656,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd) /* Get the ptr to our adapter structure out of cmd->host. */ h = sdev_to_hba(cmd->device); - BUG_ON(cmd->request->tag < 0); - dev = cmd->device->hostdata; if (!dev) { cmd->result = DID_NO_CONNECT << 16; @@ -5830,7 +5831,7 @@ static int hpsa_scsi_host_alloc(struct ctlr_info *h) sh->hostdata[0] = (unsigned long) h; sh->irq = pci_irq_vector(h->pdev, 0); sh->unique_id = sh->irq; - + sh->nr_hw_queues = h->msix_vectors > 0 ? h->msix_vectors : 1; h->scsi_host = sh; return 0; } @@ -5856,7 +5857,8 @@ static int hpsa_scsi_add_host(struct ctlr_info *h) */ static int hpsa_get_cmd_index(struct scsi_cmnd *scmd) { - int idx = scmd->request->tag; + u32 blk_tag = blk_mq_unique_tag(scmd->request); + int idx = blk_mq_unique_tag_to_tag(blk_tag); if (idx < 0) return idx; @@ -7456,26 +7458,6 @@ static void hpsa_disable_interrupt_mode(struct ctlr_info *h) h->msix_vectors = 0; } -static void hpsa_setup_reply_map(struct ctlr_info *h) -{ - const struct cpumask *mask; - unsigned int queue, cpu; - - for (queue = 0; queue < h->msix_vectors; queue++) { - mask = pci_irq_get_affinity(h->pdev, queue); - if (!mask) - goto fallback; - - for_each_cpu(cpu, mask) - h->reply_map[cpu] = queue; - } - return; - -fallback: - for_each_possible_cpu(cpu) - h->reply_map[cpu] = 0; -} - /* If MSI/MSI-X is supported by the kernel we will try to enable it on * controllers that are capable. If not, we use legacy INTx mode. */ @@ -7872,9 +7854,6 @@ static int hpsa_pci_init(struct ctlr_info *h) if (err) goto clean1; - /* setup mapping between CPU and reply queue */ - hpsa_setup_reply_map(h); - err = hpsa_pci_find_memory_BAR(h->pdev, &h->paddr); if (err) goto clean2; /* intmode+region, pci */ @@ -8613,7 +8592,6 @@ static struct workqueue_struct *hpsa_create_controller_wq(struct ctlr_info *h, static void hpda_free_ctlr_info(struct ctlr_info *h) { - kfree(h->reply_map); kfree(h); } @@ -8622,14 +8600,6 @@ static struct ctlr_info *hpda_alloc_ctlr_info(void) struct ctlr_info *h; h = kzalloc(sizeof(*h), GFP_KERNEL); - if (!h) - return NULL; - - h->reply_map = kcalloc(nr_cpu_ids, sizeof(*h->reply_map), GFP_KERNEL); - if (!h->reply_map) { - kfree(h); - return NULL; - } return h; } diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h index f8c88fc7b80a..ea4a609e3eb7 100644 --- a/drivers/scsi/hpsa.h +++ b/drivers/scsi/hpsa.h @@ -161,7 +161,6 @@ struct bmic_controller_parameters { #pragma pack() struct ctlr_info { - unsigned int *reply_map; int ctlr; char devname[8]; char *product_name;