Message ID | 20210322081044.62003-1-avri.altman@wdc.com |
---|---|
Headers | show |
Series | Add Host control mode to HPB | expand |
On 2021-03-22 16:10, Avri Altman wrote: > In host control mode, reads are the major source of activation trials. > Keep track of those reads counters, for both active as well inactive > regions. > > We reset the read counter upon write - we are only interested in > "clean" > reads. > > Keep those counters normalized, as we are using those reads as a > comparative score, to make various decisions. > If during consecutive normalizations an active region has exhaust its > reads - inactivate it. > > while at it, protect the {active,inactive}_count stats by adding them > into the applicable handler. > > Signed-off-by: Avri Altman <avri.altman@wdc.com> > --- > drivers/scsi/ufs/ufshpb.c | 100 +++++++++++++++++++++++++++++++------- > drivers/scsi/ufs/ufshpb.h | 5 ++ > 2 files changed, 88 insertions(+), 17 deletions(-) > > diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c > index d4f0bb6d8fa1..a1519cbb4ce0 100644 > --- a/drivers/scsi/ufs/ufshpb.c > +++ b/drivers/scsi/ufs/ufshpb.c > @@ -16,6 +16,8 @@ > #include "ufshpb.h" > #include "../sd.h" > > +#define ACTIVATION_THRESHOLD 8 /* 8 IOs */ > + > /* memory management */ > static struct kmem_cache *ufshpb_mctx_cache; > static mempool_t *ufshpb_mctx_pool; > @@ -546,6 +548,23 @@ static int ufshpb_issue_pre_req(struct ufshpb_lu > *hpb, struct scsi_cmnd *cmd, > return ret; > } > > +static void ufshpb_update_active_info(struct ufshpb_lu *hpb, int > rgn_idx, > + int srgn_idx) > +{ > + struct ufshpb_region *rgn; > + struct ufshpb_subregion *srgn; > + > + rgn = hpb->rgn_tbl + rgn_idx; > + srgn = rgn->srgn_tbl + srgn_idx; > + > + list_del_init(&rgn->list_inact_rgn); > + > + if (list_empty(&srgn->list_act_srgn)) > + list_add_tail(&srgn->list_act_srgn, &hpb->lh_act_srgn); > + > + hpb->stats.rb_active_cnt++; > +} > + > /* > * This function will set up HPB read command using host-side L2P map > data. > */ > @@ -596,12 +615,43 @@ int ufshpb_prep(struct ufs_hba *hba, struct > ufshcd_lrb *lrbp) > ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset, > transfer_len); > spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); > + > + if (hpb->is_hcm) { > + spin_lock(&rgn->rgn_lock); > + rgn->reads = 0; > + spin_unlock(&rgn->rgn_lock); > + } > + > return 0; > } > > if (!ufshpb_is_support_chunk(hpb, transfer_len)) > return 0; > > + if (hpb->is_hcm) { > + bool activate = false; > + /* > + * in host control mode, reads are the main source for > + * activation trials. > + */ > + spin_lock(&rgn->rgn_lock); > + rgn->reads++; > + if (rgn->reads == ACTIVATION_THRESHOLD) > + activate = true; > + spin_unlock(&rgn->rgn_lock); > + if (activate) { > + spin_lock_irqsave(&hpb->rsp_list_lock, flags); > + ufshpb_update_active_info(hpb, rgn_idx, srgn_idx); If a transfer_len (possible with HPB2.0) sits accross two regions/sub-regions, here it only updates active info of the first region/sub-region. Thanks, Can Guo. > + spin_unlock_irqrestore(&hpb->rsp_list_lock, flags); > + dev_dbg(&hpb->sdev_ufs_lu->sdev_dev, > + "activate region %d-%d\n", rgn_idx, srgn_idx); > + } > + > + /* keep those counters normalized */ > + if (rgn->reads > hpb->entries_per_srgn) > + schedule_work(&hpb->ufshpb_normalization_work); > + } > + > spin_lock_irqsave(&hpb->rgn_state_lock, flags); > if (ufshpb_test_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset, > transfer_len)) { > @@ -741,21 +791,6 @@ static int ufshpb_clear_dirty_bitmap(struct > ufshpb_lu *hpb, > return 0; > } > > -static void ufshpb_update_active_info(struct ufshpb_lu *hpb, int > rgn_idx, > - int srgn_idx) > -{ > - struct ufshpb_region *rgn; > - struct ufshpb_subregion *srgn; > - > - rgn = hpb->rgn_tbl + rgn_idx; > - srgn = rgn->srgn_tbl + srgn_idx; > - > - list_del_init(&rgn->list_inact_rgn); > - > - if (list_empty(&srgn->list_act_srgn)) > - list_add_tail(&srgn->list_act_srgn, &hpb->lh_act_srgn); > -} > - > static void ufshpb_update_inactive_info(struct ufshpb_lu *hpb, int > rgn_idx) > { > struct ufshpb_region *rgn; > @@ -769,6 +804,8 @@ static void ufshpb_update_inactive_info(struct > ufshpb_lu *hpb, int rgn_idx) > > if (list_empty(&rgn->list_inact_rgn)) > list_add_tail(&rgn->list_inact_rgn, &hpb->lh_inact_rgn); > + > + hpb->stats.rb_inactive_cnt++; > } > > static void ufshpb_activate_subregion(struct ufshpb_lu *hpb, > @@ -1089,6 +1126,7 @@ static int ufshpb_evict_region(struct ufshpb_lu > *hpb, struct ufshpb_region *rgn) > rgn->rgn_idx); > goto out; > } > + > if (!list_empty(&rgn->list_lru_rgn)) { > if (ufshpb_check_srgns_issue_state(hpb, rgn)) { > ret = -EBUSY; > @@ -1283,7 +1321,6 @@ static void ufshpb_rsp_req_region_update(struct > ufshpb_lu *hpb, > if (srgn->srgn_state == HPB_SRGN_VALID) > srgn->srgn_state = HPB_SRGN_INVALID; > spin_unlock(&hpb->rgn_state_lock); > - hpb->stats.rb_active_cnt++; > } > > if (hpb->is_hcm) { > @@ -1315,7 +1352,6 @@ static void ufshpb_rsp_req_region_update(struct > ufshpb_lu *hpb, > } > spin_unlock(&hpb->rgn_state_lock); > > - hpb->stats.rb_inactive_cnt++; > } > > out: > @@ -1514,6 +1550,29 @@ static void > ufshpb_run_inactive_region_list(struct ufshpb_lu *hpb) > spin_unlock_irqrestore(&hpb->rsp_list_lock, flags); > } > > +static void ufshpb_normalization_work_handler(struct work_struct > *work) > +{ > + struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu, > + ufshpb_normalization_work); > + int rgn_idx; > + > + for (rgn_idx = 0; rgn_idx < hpb->rgns_per_lu; rgn_idx++) { > + struct ufshpb_region *rgn = hpb->rgn_tbl + rgn_idx; > + > + spin_lock(&rgn->rgn_lock); > + rgn->reads = (rgn->reads >> 1); > + spin_unlock(&rgn->rgn_lock); > + > + if (rgn->rgn_state != HPB_RGN_ACTIVE || rgn->reads) > + continue; > + > + /* if region is active but has no reads - inactivate it */ > + spin_lock(&hpb->rsp_list_lock); > + ufshpb_update_inactive_info(hpb, rgn->rgn_idx); > + spin_unlock(&hpb->rsp_list_lock); > + } > +} > + > static void ufshpb_map_work_handler(struct work_struct *work) > { > struct ufshpb_lu *hpb = container_of(work, struct ufshpb_lu, > map_work); > @@ -1673,6 +1732,8 @@ static int ufshpb_alloc_region_tbl(struct > ufs_hba *hba, struct ufshpb_lu *hpb) > rgn = rgn_table + rgn_idx; > rgn->rgn_idx = rgn_idx; > > + spin_lock_init(&rgn->rgn_lock); > + > INIT_LIST_HEAD(&rgn->list_inact_rgn); > INIT_LIST_HEAD(&rgn->list_lru_rgn); > > @@ -1914,6 +1975,9 @@ static int ufshpb_lu_hpb_init(struct ufs_hba > *hba, struct ufshpb_lu *hpb) > INIT_LIST_HEAD(&hpb->list_hpb_lu); > > INIT_WORK(&hpb->map_work, ufshpb_map_work_handler); > + if (hpb->is_hcm) > + INIT_WORK(&hpb->ufshpb_normalization_work, > + ufshpb_normalization_work_handler); > > hpb->map_req_cache = kmem_cache_create("ufshpb_req_cache", > sizeof(struct ufshpb_req), 0, 0, NULL); > @@ -2013,6 +2077,8 @@ static void ufshpb_discard_rsp_lists(struct > ufshpb_lu *hpb) > > static void ufshpb_cancel_jobs(struct ufshpb_lu *hpb) > { > + if (hpb->is_hcm) > + cancel_work_sync(&hpb->ufshpb_normalization_work); > cancel_work_sync(&hpb->map_work); > } > > diff --git a/drivers/scsi/ufs/ufshpb.h b/drivers/scsi/ufs/ufshpb.h > index 032672114881..32d72c46c57a 100644 > --- a/drivers/scsi/ufs/ufshpb.h > +++ b/drivers/scsi/ufs/ufshpb.h > @@ -123,6 +123,10 @@ struct ufshpb_region { > struct list_head list_lru_rgn; > unsigned long rgn_flags; > #define RGN_FLAG_DIRTY 0 > + > + /* region reads - for host mode */ > + spinlock_t rgn_lock; > + unsigned int reads; > }; > > #define for_each_sub_region(rgn, i, srgn) \ > @@ -212,6 +216,7 @@ struct ufshpb_lu { > > /* for selecting victim */ > struct victim_select_info lru_info; > + struct work_struct ufshpb_normalization_work; > > /* pinned region information */ > u32 lu_pinned_start;
> > @@ -596,12 +615,43 @@ int ufshpb_prep(struct ufs_hba *hba, struct > > ufshcd_lrb *lrbp) > > ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset, > > transfer_len); > > spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); > > + > > + if (hpb->is_hcm) { > > + spin_lock(&rgn->rgn_lock); > > + rgn->reads = 0; > > + spin_unlock(&rgn->rgn_lock); Here also. > > + } > > + > > return 0; > > } > > > > if (!ufshpb_is_support_chunk(hpb, transfer_len)) > > return 0; > > > > + if (hpb->is_hcm) { > > + bool activate = false; > > + /* > > + * in host control mode, reads are the main source for > > + * activation trials. > > + */ > > + spin_lock(&rgn->rgn_lock); > > + rgn->reads++; > > + if (rgn->reads == ACTIVATION_THRESHOLD) > > + activate = true; > > + spin_unlock(&rgn->rgn_lock); > > + if (activate) { > > + spin_lock_irqsave(&hpb->rsp_list_lock, flags); > > + ufshpb_update_active_info(hpb, rgn_idx, srgn_idx); > > If a transfer_len (possible with HPB2.0) sits accross two > regions/sub-regions, > here it only updates active info of the first region/sub-region. Yes. Will fix. Thanks, Avri > > Thanks, > Can Guo. >
On 2021-03-22 16:10, Avri Altman wrote: > In host mode, eviction is considered an extreme measure. > verify that the entering region has enough reads, and the exiting > region has much less reads. > > Signed-off-by: Avri Altman <avri.altman@wdc.com> > --- > drivers/scsi/ufs/ufshpb.c | 18 +++++++++++++++++- > 1 file changed, 17 insertions(+), 1 deletion(-) > > diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c > index a1519cbb4ce0..5e757220d66a 100644 > --- a/drivers/scsi/ufs/ufshpb.c > +++ b/drivers/scsi/ufs/ufshpb.c > @@ -17,6 +17,7 @@ > #include "../sd.h" > > #define ACTIVATION_THRESHOLD 8 /* 8 IOs */ > +#define EVICTION_THRESHOLD (ACTIVATION_THRESHOLD << 5) /* 256 IOs */ > > /* memory management */ > static struct kmem_cache *ufshpb_mctx_cache; > @@ -1047,6 +1048,13 @@ static struct ufshpb_region > *ufshpb_victim_lru_info(struct ufshpb_lu *hpb) > if (ufshpb_check_srgns_issue_state(hpb, rgn)) > continue; > > + /* > + * in host control mode, verify that the exiting region > + * has less reads > + */ > + if (hpb->is_hcm && rgn->reads > (EVICTION_THRESHOLD >> 1)) > + continue; > + > victim_rgn = rgn; > break; > } > @@ -1219,7 +1227,7 @@ static int ufshpb_issue_map_req(struct ufshpb_lu > *hpb, > > static int ufshpb_add_region(struct ufshpb_lu *hpb, struct > ufshpb_region *rgn) > { > - struct ufshpb_region *victim_rgn; > + struct ufshpb_region *victim_rgn = NULL; > struct victim_select_info *lru_info = &hpb->lru_info; > unsigned long flags; > int ret = 0; > @@ -1246,7 +1254,15 @@ static int ufshpb_add_region(struct ufshpb_lu > *hpb, struct ufshpb_region *rgn) > * It is okay to evict the least recently used region, > * because the device could detect this region > * by not issuing HPB_READ > + * > + * in host control mode, verify that the entering > + * region has enough reads > */ > + if (hpb->is_hcm && rgn->reads < EVICTION_THRESHOLD) { > + ret = -EACCES; > + goto out; > + } > + I cannot understand the logic behind this. A rgn which host chooses to activate, is in INACTIVE state now, if its rgn->reads < 256, then don't activate it. Could you please elaborate? Thanks, Can Guo. > victim_rgn = ufshpb_victim_lru_info(hpb); > if (!victim_rgn) { > dev_warn(&hpb->sdev_ufs_lu->sdev_dev,
> > On 2021-03-22 16:10, Avri Altman wrote: > > In host mode, eviction is considered an extreme measure. > > verify that the entering region has enough reads, and the exiting > > region has much less reads. > > > > Signed-off-by: Avri Altman <avri.altman@wdc.com> > > --- > > drivers/scsi/ufs/ufshpb.c | 18 +++++++++++++++++- > > 1 file changed, 17 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/scsi/ufs/ufshpb.c b/drivers/scsi/ufs/ufshpb.c > > index a1519cbb4ce0..5e757220d66a 100644 > > --- a/drivers/scsi/ufs/ufshpb.c > > +++ b/drivers/scsi/ufs/ufshpb.c > > @@ -17,6 +17,7 @@ > > #include "../sd.h" > > > > #define ACTIVATION_THRESHOLD 8 /* 8 IOs */ > > +#define EVICTION_THRESHOLD (ACTIVATION_THRESHOLD << 5) /* 256 IOs > */ > > > > /* memory management */ > > static struct kmem_cache *ufshpb_mctx_cache; > > @@ -1047,6 +1048,13 @@ static struct ufshpb_region > > *ufshpb_victim_lru_info(struct ufshpb_lu *hpb) > > if (ufshpb_check_srgns_issue_state(hpb, rgn)) > > continue; > > > > + /* > > + * in host control mode, verify that the exiting region > > + * has less reads > > + */ > > + if (hpb->is_hcm && rgn->reads > (EVICTION_THRESHOLD >> 1)) > > + continue; > > + > > victim_rgn = rgn; > > break; > > } > > @@ -1219,7 +1227,7 @@ static int ufshpb_issue_map_req(struct ufshpb_lu > > *hpb, > > > > static int ufshpb_add_region(struct ufshpb_lu *hpb, struct > > ufshpb_region *rgn) > > { > > - struct ufshpb_region *victim_rgn; > > + struct ufshpb_region *victim_rgn = NULL; > > struct victim_select_info *lru_info = &hpb->lru_info; > > unsigned long flags; > > int ret = 0; > > @@ -1246,7 +1254,15 @@ static int ufshpb_add_region(struct ufshpb_lu > > *hpb, struct ufshpb_region *rgn) > > * It is okay to evict the least recently used region, > > * because the device could detect this region > > * by not issuing HPB_READ > > + * > > + * in host control mode, verify that the entering > > + * region has enough reads > > */ > > + if (hpb->is_hcm && rgn->reads < EVICTION_THRESHOLD) { > > + ret = -EACCES; > > + goto out; > > + } > > + > > I cannot understand the logic behind this. A rgn which host chooses to > activate, > is in INACTIVE state now, if its rgn->reads < 256, then don't activate > it. > Could you please elaborate? I am re-citing the commit log: "In host mode, eviction is considered an extreme measure. verify that the entering region has enough reads, and the exiting region has much less reads." Here comes to play the reads counter as a comparative index. Max-active-regions has crossed, and to activate a region, you need to evict another region. But the activation threshold is relatively low, how do you know that you will benefit more, From the new region, than from the one you choose to evict? Not to arbitrarily evict the "first" (LRU) region, like in device mode, we are looking for a solid Reason for the new region to enter, and for the existing region to leave. Otherwise, you will find yourself entering and existing the same region over and over, Just threshing the active-list creating an unnecessary overhead by keep sending map requests. For example, say the entering region has 4 reads, but the LRU region has 200, and its reads keeps coming. Is it the "correct" decision to evict a 200-reads region for a 4-reads region? If you indeed evict this 200-reads region, you will evict another to put it right back, Over and over. On the other hand, we are not hanging-on to "cold" regions, and inactivate them if there are no recent Reads to that region - see the patch with the "Cold" timeout. I agree that this can be elaborate to a more sophisticated policies - which we tried. For now, let's go with the simplest one - use thresholds for both the entering and exiting regions. Thanks, Avri > > Thanks, > Can Guo. > > > victim_rgn = ufshpb_victim_lru_info(hpb); > > if (!victim_rgn) { > > dev_warn(&hpb->sdev_ufs_lu->sdev_dev,
> > > @@ -596,12 +615,43 @@ int ufshpb_prep(struct ufs_hba *hba, struct > > > ufshcd_lrb *lrbp) > > > ufshpb_set_ppn_dirty(hpb, rgn_idx, srgn_idx, srgn_offset, > > > transfer_len); > > > spin_unlock_irqrestore(&hpb->rgn_state_lock, flags); > > > + > > > + if (hpb->is_hcm) { > > > + spin_lock(&rgn->rgn_lock); > > > + rgn->reads = 0; > > > + spin_unlock(&rgn->rgn_lock); > Here also. > > > > + } > > > + > > > return 0; > > > } > > > > > > if (!ufshpb_is_support_chunk(hpb, transfer_len)) > > > return 0; > > > > > > + if (hpb->is_hcm) { > > > + bool activate = false; > > > + /* > > > + * in host control mode, reads are the main source for > > > + * activation trials. > > > + */ > > > + spin_lock(&rgn->rgn_lock); > > > + rgn->reads++; > > > + if (rgn->reads == ACTIVATION_THRESHOLD) > > > + activate = true; > > > + spin_unlock(&rgn->rgn_lock); > > > + if (activate) { > > > + spin_lock_irqsave(&hpb->rsp_list_lock, flags); > > > + ufshpb_update_active_info(hpb, rgn_idx, srgn_idx); > > > > If a transfer_len (possible with HPB2.0) sits accross two > > regions/sub-regions, > > here it only updates active info of the first region/sub-region. > Yes. Will fix. Giving it another look, I noticed that the current design only support a single subregion per region. As activation is done per subregion, we need to count reads per subregion and make those activation decisions accordingly. Still, the read counter per region needs to stay, as well as its spinlock for the inactivation decision. Will fix it in my next version. Waiting for v32. Thanks, Avri > > Thanks, > Avri > > > > > Thanks, > > Can Guo. > >