Message ID | 20201019065416.34638-1-its@irrelevant.dk |
---|---|
State | New |
Headers | show |
Series | hw/block/nvme: fix aer logic | expand |
On Mon, Oct 19, 2020 at 08:54:16AM +0200, Klaus Jensen wrote: > @@ -844,6 +838,12 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, > return; > } > > + /* ignore if masked (cqe posted, but event not cleared) */ > + if (n->aer_mask & (1 << event_type)) { > + trace_pci_nvme_aer_masked(event_type, n->aer_mask); > + return; > + } The 'mask' means the host hasn't yet acknowledged the AER with the appropriate log. The controller should continue to internally enqueue subsequent events of this type, but suppress sending the notification for them until the host unlatches the event type. > event = g_new(NvmeAsyncEvent, 1); > event->result = (NvmeAerResult) { > .event_type = event_type, > @@ -859,9 +859,15 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, > > static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) > { > + NvmeAsyncEvent *event, *next; > + > n->aer_mask &= ~(1 << event_type); > - if (!QTAILQ_EMPTY(&n->aer_queue)) { > - nvme_process_aers(n); > + > + QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { > + if (event->result.event_type == event_type) { > + QTAILQ_REMOVE(&n->aer_queue, event, entry); Memory leaking the 'event'? > + n->aer_queued--; > + } > } > }
On Oct 19 09:43, Keith Busch wrote: > On Mon, Oct 19, 2020 at 08:54:16AM +0200, Klaus Jensen wrote: > > @@ -844,6 +838,12 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, > > return; > > } > > > > + /* ignore if masked (cqe posted, but event not cleared) */ > > + if (n->aer_mask & (1 << event_type)) { > > + trace_pci_nvme_aer_masked(event_type, n->aer_mask); > > + return; > > + } > > The 'mask' means the host hasn't yet acknowledged the AER with the > appropriate log. The controller should continue to internally enqueue > subsequent events of this type, but suppress sending the notification > for them until the host unlatches the event type. > Ugh. Looks like you are right. Again. Notice events are definitely a good case for when we want to queue up the events internally since the information correspond to different log pages but use the same type. > > event = g_new(NvmeAsyncEvent, 1); > > event->result = (NvmeAerResult) { > > .event_type = event_type, > > @@ -859,9 +859,15 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, > > > > static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) > > { > > + NvmeAsyncEvent *event, *next; > > + > > n->aer_mask &= ~(1 << event_type); > > - if (!QTAILQ_EMPTY(&n->aer_queue)) { > > - nvme_process_aers(n); > > + > > + QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { > > + if (event->result.event_type == event_type) { > > + QTAILQ_REMOVE(&n->aer_queue, event, entry); > > Memory leaking the 'event'? > Thanks, good catch, but this change is also irrelevant now. > > + n->aer_queued--; > > + } > > } > > } >
diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 9d30ca69dcf1..b18a310d9271 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -805,12 +805,6 @@ static void nvme_process_aers(void *opaque) break; } - /* ignore if masked (cqe posted, but event not cleared) */ - if (n->aer_mask & (1 << event->result.event_type)) { - trace_pci_nvme_aer_masked(event->result.event_type, n->aer_mask); - continue; - } - QTAILQ_REMOVE(&n->aer_queue, event, entry); n->aer_queued--; @@ -844,6 +838,12 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, return; } + /* ignore if masked (cqe posted, but event not cleared) */ + if (n->aer_mask & (1 << event_type)) { + trace_pci_nvme_aer_masked(event_type, n->aer_mask); + return; + } + event = g_new(NvmeAsyncEvent, 1); event->result = (NvmeAerResult) { .event_type = event_type, @@ -859,9 +859,15 @@ static void nvme_enqueue_event(NvmeCtrl *n, uint8_t event_type, static void nvme_clear_events(NvmeCtrl *n, uint8_t event_type) { + NvmeAsyncEvent *event, *next; + n->aer_mask &= ~(1 << event_type); - if (!QTAILQ_EMPTY(&n->aer_queue)) { - nvme_process_aers(n); + + QTAILQ_FOREACH_SAFE(event, &n->aer_queue, entry, next) { + if (event->result.event_type == event_type) { + QTAILQ_REMOVE(&n->aer_queue, event, entry); + n->aer_queued--; + } } } diff --git a/hw/block/trace-events b/hw/block/trace-events index cab9913b1f2d..11bad6ae6a11 100644 --- a/hw/block/trace-events +++ b/hw/block/trace-events @@ -67,7 +67,7 @@ pci_nvme_aer_post_cqe(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRI pci_nvme_enqueue_event(uint8_t typ, uint8_t info, uint8_t log_page) "type 0x%"PRIx8" info 0x%"PRIx8" lid 0x%"PRIx8"" pci_nvme_enqueue_event_noqueue(int queued) "queued %d" pci_nvme_enqueue_event_masked(uint8_t typ) "type 0x%"PRIx8"" -pci_nvme_no_outstanding_aers(void) "ignoring event; no outstanding AERs" +pci_nvme_no_outstanding_aers(void) "no outstanding aers" pci_nvme_enqueue_req_completion(uint16_t cid, uint16_t cqid, uint16_t status) "cid %"PRIu16" cqid %"PRIu16" status 0x%"PRIx16"" pci_nvme_mmio_read(uint64_t addr) "addr 0x%"PRIx64"" pci_nvme_mmio_write(uint64_t addr, uint64_t data) "addr 0x%"PRIx64" data 0x%"PRIx64""