Message ID | 20201218033131.2624065-1-jaegeuk@kernel.org |
---|---|
State | Superseded |
Headers | show |
Series | scsi: ufs: fix livelock of ufshcd_clear_ua_wluns | expand |
Hi J, > > When gate_work/ungate_work gets an error during hibern8_enter or exit, > ufshcd_err_handler() > ufshcd_scsi_block_requests() > ufshcd_reset_and_restore() > ufshcd_clear_ua_wluns() -> stuck > ufshcd_scsi_unblock_requests() > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery > flows > such as suspend/resume, link_recovery, and error_handler. Not sure that suspend/resume are UAC events? Also the 'fixes' tag is missing. Thanks, Avri
On 2020-12-18 11:31, Jaegeuk Kim wrote: > When gate_work/ungate_work gets an error during hibern8_enter or exit, > ufshcd_err_handler() > ufshcd_scsi_block_requests() > ufshcd_reset_and_restore() > ufshcd_clear_ua_wluns() -> stuck > ufshcd_scsi_unblock_requests() > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per > recovery flows > such as suspend/resume, link_recovery, and error_handler. > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> > --- > drivers/scsi/ufs/ufshcd.c | 14 +++++++++----- > 1 file changed, 9 insertions(+), 5 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > index e221add25a7e..e711def829cd 100644 > --- a/drivers/scsi/ufs/ufshcd.c > +++ b/drivers/scsi/ufs/ufshcd.c > @@ -3963,6 +3963,8 @@ int ufshcd_link_recovery(struct ufs_hba *hba) > if (ret) > dev_err(hba->dev, "%s: link recovery failed, err %d", > __func__, ret); > + else > + ufshcd_clear_ua_wluns(hba); > > return ret; > } > @@ -5968,6 +5970,8 @@ static void ufshcd_err_handler(struct work_struct > *work) > ufshcd_scsi_unblock_requests(hba); > ufshcd_err_handling_unprepare(hba); > up(&hba->eh_sem); > + Maybe add a check like if (!err && needs_reset) as error handler also handles non-fatal errors which do not require a full reset and restore? > + ufshcd_clear_ua_wluns(hba); > } > > /** > @@ -6908,14 +6912,11 @@ static int > ufshcd_host_reset_and_restore(struct ufs_hba *hba) > ufshcd_set_clk_freq(hba, true); > > err = ufshcd_hba_enable(hba); > - if (err) > - goto out; > > /* Establish the link again and restore the device */ > - err = ufshcd_probe_hba(hba, false); > if (!err) > - ufshcd_clear_ua_wluns(hba); > -out: > + err = ufshcd_probe_hba(hba, false); > + > if (err) > dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err); > ufshcd_update_evt_hist(hba, UFS_EVT_HOST_RESET, (u32)err); > @@ -8745,6 +8746,7 @@ static int ufshcd_suspend(struct ufs_hba *hba, > enum ufs_pm_op pm_op) > ufshcd_resume_clkscaling(hba); > hba->clk_gating.is_suspended = false; > hba->dev_info.b_rpm_dev_flush_capable = false; > + ufshcd_clear_ua_wluns(hba); > ufshcd_release(hba); > out: > if (hba->dev_info.b_rpm_dev_flush_capable) { > @@ -8855,6 +8857,8 @@ static int ufshcd_resume(struct ufs_hba *hba, > enum ufs_pm_op pm_op) > cancel_delayed_work(&hba->rpm_dev_flush_recheck_work); > } > > + ufshcd_clear_ua_wluns(hba); > + > /* Schedule clock gating in case of no access to UFS device yet */ > ufshcd_release(hba);
On 12/21, Can Guo wrote: > On 2020-12-18 11:31, Jaegeuk Kim wrote: > > When gate_work/ungate_work gets an error during hibern8_enter or exit, > > ufshcd_err_handler() > > ufshcd_scsi_block_requests() > > ufshcd_reset_and_restore() > > ufshcd_clear_ua_wluns() -> stuck > > ufshcd_scsi_unblock_requests() > > > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery > > flows > > such as suspend/resume, link_recovery, and error_handler. > > > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> > > --- > > drivers/scsi/ufs/ufshcd.c | 14 +++++++++----- > > 1 file changed, 9 insertions(+), 5 deletions(-) > > > > diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c > > index e221add25a7e..e711def829cd 100644 > > --- a/drivers/scsi/ufs/ufshcd.c > > +++ b/drivers/scsi/ufs/ufshcd.c > > @@ -3963,6 +3963,8 @@ int ufshcd_link_recovery(struct ufs_hba *hba) > > if (ret) > > dev_err(hba->dev, "%s: link recovery failed, err %d", > > __func__, ret); > > + else > > + ufshcd_clear_ua_wluns(hba); > > > > return ret; > > } > > @@ -5968,6 +5970,8 @@ static void ufshcd_err_handler(struct work_struct > > *work) > > ufshcd_scsi_unblock_requests(hba); > > ufshcd_err_handling_unprepare(hba); > > up(&hba->eh_sem); > > + > > Maybe add a check like if (!err && needs_reset) as error handler > also handles non-fatal errors which do not require a full reset > and restore? I see. Let me add it in v2. > > > + ufshcd_clear_ua_wluns(hba); > > } > > > > /** > > @@ -6908,14 +6912,11 @@ static int > > ufshcd_host_reset_and_restore(struct ufs_hba *hba) > > ufshcd_set_clk_freq(hba, true); > > > > err = ufshcd_hba_enable(hba); > > - if (err) > > - goto out; > > > > /* Establish the link again and restore the device */ > > - err = ufshcd_probe_hba(hba, false); > > if (!err) > > - ufshcd_clear_ua_wluns(hba); > > -out: > > + err = ufshcd_probe_hba(hba, false); > > + > > if (err) > > dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err); > > ufshcd_update_evt_hist(hba, UFS_EVT_HOST_RESET, (u32)err); > > @@ -8745,6 +8746,7 @@ static int ufshcd_suspend(struct ufs_hba *hba, > > enum ufs_pm_op pm_op) > > ufshcd_resume_clkscaling(hba); > > hba->clk_gating.is_suspended = false; > > hba->dev_info.b_rpm_dev_flush_capable = false; > > + ufshcd_clear_ua_wluns(hba); > > ufshcd_release(hba); > > out: > > if (hba->dev_info.b_rpm_dev_flush_capable) { > > @@ -8855,6 +8857,8 @@ static int ufshcd_resume(struct ufs_hba *hba, > > enum ufs_pm_op pm_op) > > cancel_delayed_work(&hba->rpm_dev_flush_recheck_work); > > } > > > > + ufshcd_clear_ua_wluns(hba); > > + > > /* Schedule clock gating in case of no access to UFS device yet */ > > ufshcd_release(hba);
Hi, On 12/20, Avri Altman wrote: > Hi J, > > > > > When gate_work/ungate_work gets an error during hibern8_enter or exit, > > ufshcd_err_handler() > > ufshcd_scsi_block_requests() > > ufshcd_reset_and_restore() > > ufshcd_clear_ua_wluns() -> stuck > > ufshcd_scsi_unblock_requests() > > > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery > > flows > > such as suspend/resume, link_recovery, and error_handler. > Not sure that suspend/resume are UAC events? Could you elaborate a bit? The goal is to clear UAC after UFS reset happens. > > Also the 'fixes' tag is missing. Added. Thanks, > > Thanks, > Avri
When gate_work/ungate_work gets an error during hibern8_enter or exit,
ufshcd_err_handler()
ufshcd_scsi_block_requests()
ufshcd_reset_and_restore()
ufshcd_clear_ua_wluns() -> stuck
ufshcd_scsi_unblock_requests()
In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery flows
such as suspend/resume, link_recovery, and error_handler.
Fixes: 1918651f2d7e ("scsi: ufs: Clear UAC for RPMB after ufshcd resets")
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
Change log from v1:
- add condition check to call
- add Fixes tag
drivers/scsi/ufs/ufshcd.c | 15 ++++++++++-----
1 file changed, 10 insertions(+), 5 deletions(-)
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index e221add25a7e..29a62552f6f1 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3963,6 +3963,8 @@ int ufshcd_link_recovery(struct ufs_hba *hba)
if (ret)
dev_err(hba->dev, "%s: link recovery failed, err %d",
__func__, ret);
+ else
+ ufshcd_clear_ua_wluns(hba);
return ret;
}
@@ -5968,6 +5970,9 @@ static void ufshcd_err_handler(struct work_struct *work)
ufshcd_scsi_unblock_requests(hba);
ufshcd_err_handling_unprepare(hba);
up(&hba->eh_sem);
+
+ if (!err && needs_reset)
+ ufshcd_clear_ua_wluns(hba);
}
/**
@@ -6908,14 +6913,11 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
ufshcd_set_clk_freq(hba, true);
err = ufshcd_hba_enable(hba);
- if (err)
- goto out;
/* Establish the link again and restore the device */
- err = ufshcd_probe_hba(hba, false);
if (!err)
- ufshcd_clear_ua_wluns(hba);
-out:
+ err = ufshcd_probe_hba(hba, false);
+
if (err)
dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
ufshcd_update_evt_hist(hba, UFS_EVT_HOST_RESET, (u32)err);
@@ -8745,6 +8747,7 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
ufshcd_resume_clkscaling(hba);
hba->clk_gating.is_suspended = false;
hba->dev_info.b_rpm_dev_flush_capable = false;
+ ufshcd_clear_ua_wluns(hba);
ufshcd_release(hba);
out:
if (hba->dev_info.b_rpm_dev_flush_capable) {
@@ -8855,6 +8858,8 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op)
cancel_delayed_work(&hba->rpm_dev_flush_recheck_work);
}
+ ufshcd_clear_ua_wluns(hba);
+
/* Schedule clock gating in case of no access to UFS device yet */
ufshcd_release(hba);
--
2.29.2.729.g45daf8777d-goog
> > > When gate_work/ungate_work gets an error during hibern8_enter or > exit, > > > ufshcd_err_handler() > > > ufshcd_scsi_block_requests() > > > ufshcd_reset_and_restore() > > > ufshcd_clear_ua_wluns() -> stuck > > > ufshcd_scsi_unblock_requests() > > > > > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery > > > flows > > > such as suspend/resume, link_recovery, and error_handler. > > Not sure that suspend/resume are UAC events? > > Could you elaborate a bit? The goal is to clear UAC after UFS reset happens. So why calling it on every suspend and resume? > > > > > Also the 'fixes' tag is missing. > > Added. Thanks, > > > > > Thanks, > > Avri
On 12/21, Avri Altman wrote: > > > > When gate_work/ungate_work gets an error during hibern8_enter or > > exit, > > > > ufshcd_err_handler() > > > > ufshcd_scsi_block_requests() > > > > ufshcd_reset_and_restore() > > > > ufshcd_clear_ua_wluns() -> stuck > > > > ufshcd_scsi_unblock_requests() > > > > > > > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery > > > > flows > > > > such as suspend/resume, link_recovery, and error_handler. > > > Not sure that suspend/resume are UAC events? > > > > Could you elaborate a bit? The goal is to clear UAC after UFS reset happens. > So why calling it on every suspend and resume? 1. If UAC was cleared, there's no impact. 2. ufshcd_link_recovery() can reset UFS directly by ufs_mtk_resume(). 3. ufshcd_suspend can call ufshcd_host_reset_and_restore() as well. > > > > > > > > > Also the 'fixes' tag is missing. > > > > Added. Thanks, > > > > > > > > Thanks, > > > Avri
> > > On 12/21, Avri Altman wrote: > > > > > When gate_work/ungate_work gets an error during hibern8_enter or > > > exit, > > > > > ufshcd_err_handler() > > > > > ufshcd_scsi_block_requests() > > > > > ufshcd_reset_and_restore() > > > > > ufshcd_clear_ua_wluns() -> stuck > > > > > ufshcd_scsi_unblock_requests() > > > > > > > > > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per > recovery > > > > > flows > > > > > such as suspend/resume, link_recovery, and error_handler. > > > > Not sure that suspend/resume are UAC events? > > > > > > Could you elaborate a bit? The goal is to clear UAC after UFS reset > happens. > > So why calling it on every suspend and resume? > > 1. If UAC was cleared, there's no impact. But the command is still sent. > 2. ufshcd_link_recovery() can reset UFS directly by ufs_mtk_resume(). > 3. ufshcd_suspend can call ufshcd_host_reset_and_restore() as well. Seems excessive IMO. Why not selectively send when indeed required, e.g. on reset?
On 12/21, Avri Altman wrote: > > > > > > On 12/21, Avri Altman wrote: > > > > > > When gate_work/ungate_work gets an error during hibern8_enter or > > > > exit, > > > > > > ufshcd_err_handler() > > > > > > ufshcd_scsi_block_requests() > > > > > > ufshcd_reset_and_restore() > > > > > > ufshcd_clear_ua_wluns() -> stuck > > > > > > ufshcd_scsi_unblock_requests() > > > > > > > > > > > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per > > recovery > > > > > > flows > > > > > > such as suspend/resume, link_recovery, and error_handler. > > > > > Not sure that suspend/resume are UAC events? > > > > > > > > Could you elaborate a bit? The goal is to clear UAC after UFS reset > > happens. > > > So why calling it on every suspend and resume? > > > > 1. If UAC was cleared, there's no impact. > But the command is still sent. No, ufshcd_clear_ua_wluns() will return by hba->wlun_dev_clr_ua. > > > 2. ufshcd_link_recovery() can reset UFS directly by ufs_mtk_resume(). > > 3. ufshcd_suspend can call ufshcd_host_reset_and_restore() as well. > Seems excessive IMO. > Why not selectively send when indeed required, e.g. on reset? I think hba->wlun_dev_clr_ua is the indicator whether there was a reset or not.
> > > > > > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per > > > recovery > > > > > > > flows > > > > > > > such as suspend/resume, link_recovery, and error_handler. > > > > > > Not sure that suspend/resume are UAC events? > > > > > > > > > > Could you elaborate a bit? The goal is to clear UAC after UFS reset > > > happens. > > > > So why calling it on every suspend and resume? > > > > > > 1. If UAC was cleared, there's no impact. > > But the command is still sent. > > No, ufshcd_clear_ua_wluns() will return by hba->wlun_dev_clr_ua. > > > > > > 2. ufshcd_link_recovery() can reset UFS directly by ufs_mtk_resume(). > > > 3. ufshcd_suspend can call ufshcd_host_reset_and_restore() as well. > > Seems excessive IMO. > > Why not selectively send when indeed required, e.g. on reset? > > I think hba->wlun_dev_clr_ua is the indicator whether there was a reset or > not. Ahha - I missed that. Thanks for clarifying it. OK Then. Thanks, Avri
Jaegeuk, > When gate_work/ungate_work gets an error during hibern8_enter or exit, > ufshcd_err_handler() > ufshcd_scsi_block_requests() > ufshcd_reset_and_restore() > ufshcd_clear_ua_wluns() -> stuck > ufshcd_scsi_unblock_requests() > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery flows > such as suspend/resume, link_recovery, and error_handler. > > Fixes: 1918651f2d7e ("scsi: ufs: Clear UAC for RPMB after ufshcd resets") > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> Please resubmit instead of replying to an existing patch. Both b4 and patchwork get confused. Thanks! -- Martin K. Petersen Oracle Linux Engineering
On 01/05, Martin K. Petersen wrote: > > Jaegeuk, > > > When gate_work/ungate_work gets an error during hibern8_enter or exit, > > ufshcd_err_handler() > > ufshcd_scsi_block_requests() > > ufshcd_reset_and_restore() > > ufshcd_clear_ua_wluns() -> stuck > > ufshcd_scsi_unblock_requests() > > > > In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery flows > > such as suspend/resume, link_recovery, and error_handler. > > > > Fixes: 1918651f2d7e ("scsi: ufs: Clear UAC for RPMB after ufshcd resets") > > Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> > > Please resubmit instead of replying to an existing patch. Both b4 and > patchwork get confused. Ok, I posted a new one. Thanks, > > Thanks! > > -- > Martin K. Petersen Oracle Linux Engineering
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index e221add25a7e..e711def829cd 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -3963,6 +3963,8 @@ int ufshcd_link_recovery(struct ufs_hba *hba) if (ret) dev_err(hba->dev, "%s: link recovery failed, err %d", __func__, ret); + else + ufshcd_clear_ua_wluns(hba); return ret; } @@ -5968,6 +5970,8 @@ static void ufshcd_err_handler(struct work_struct *work) ufshcd_scsi_unblock_requests(hba); ufshcd_err_handling_unprepare(hba); up(&hba->eh_sem); + + ufshcd_clear_ua_wluns(hba); } /** @@ -6908,14 +6912,11 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba) ufshcd_set_clk_freq(hba, true); err = ufshcd_hba_enable(hba); - if (err) - goto out; /* Establish the link again and restore the device */ - err = ufshcd_probe_hba(hba, false); if (!err) - ufshcd_clear_ua_wluns(hba); -out: + err = ufshcd_probe_hba(hba, false); + if (err) dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err); ufshcd_update_evt_hist(hba, UFS_EVT_HOST_RESET, (u32)err); @@ -8745,6 +8746,7 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufshcd_resume_clkscaling(hba); hba->clk_gating.is_suspended = false; hba->dev_info.b_rpm_dev_flush_capable = false; + ufshcd_clear_ua_wluns(hba); ufshcd_release(hba); out: if (hba->dev_info.b_rpm_dev_flush_capable) { @@ -8855,6 +8857,8 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) cancel_delayed_work(&hba->rpm_dev_flush_recheck_work); } + ufshcd_clear_ua_wluns(hba); + /* Schedule clock gating in case of no access to UFS device yet */ ufshcd_release(hba);
When gate_work/ungate_work gets an error during hibern8_enter or exit, ufshcd_err_handler() ufshcd_scsi_block_requests() ufshcd_reset_and_restore() ufshcd_clear_ua_wluns() -> stuck ufshcd_scsi_unblock_requests() In order to avoid it, ufshcd_clear_ua_wluns() can be called per recovery flows such as suspend/resume, link_recovery, and error_handler. Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org> --- drivers/scsi/ufs/ufshcd.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-)