diff mbox series

[1/2] scsi: ufs: Introduce hba performance monitor sysfs nodes

Message ID 1617257704-1154-2-git-send-email-cang@codeaurora.org
State Superseded
Headers show
Series [1/2] scsi: ufs: Introduce hba performance monitor sysfs nodes | expand

Commit Message

Can Guo April 1, 2021, 6:15 a.m. UTC
Add a new sysfs group which has nodes to monitor data/request transfer
performance. This sysfs group has nodes showing total sectors/requests
transferred, total busy time spent and max/min/avg/sum latencies. This
group can be enhanced later to show more UFS driver layer performance
statistics data during runtime.

Signed-off-by: Can Guo <cang@codeaurora.org>

Comments

Daejun Park April 6, 2021, 4:11 a.m. UTC | #1
Hi Can Guo,

> +static ssize_t monitor_enable_store(struct device *dev,

> +                                    struct device_attribute *attr,

> +                                    const char *buf, size_t count)

> +{

> +        struct ufs_hba *hba = dev_get_drvdata(dev);

> +        unsigned long value, flags;

> +

> +        if (kstrtoul(buf, 0, &value))

> +                return -EINVAL;

> +

> +        value = !!value;

> +        spin_lock_irqsave(hba->host->host_lock, flags);

> +        if (value == hba->monitor.enabled)

> +                goto out_unlock;

> +

> +        if (!value) {

> +                memset(&hba->monitor, 0, sizeof(hba->monitor));

> +        } else {

> +                hba->monitor.enabled = true;

> +                hba->monitor.enabled_ts = ktime_get();


How about setting lat_max to and lat_min to KTIME_MAX and 0?
I think lat_sum should be 0 at this point.

> +        }

> +

> +out_unlock:

> +        spin_unlock_irqrestore(hba->host->host_lock, flags);

> +        return count;

> +}



> +static void ufshcd_update_monitor(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)

> +{

> +        int dir = ufshcd_monitor_opcode2dir(*lrbp->cmd->cmnd);

> +

> +        if (dir >= 0 && hba->monitor.nr_queued[dir] > 0) {

> +                struct request *req = lrbp->cmd->request;

> +                struct ufs_hba_monitor *m = &hba->monitor;

> +                ktime_t now, inc, lat;

> +

> +                now = ktime_get();


How about using lrbp->compl_time_stamp instead of getting new value?

> +                inc = ktime_sub(now, m->busy_start_ts[dir]);

> +                m->total_busy[dir] = ktime_add(m->total_busy[dir], inc);

> +                m->nr_sec_rw[dir] += blk_rq_sectors(req);

> +

> +                /* Update latencies */

> +                m->nr_req[dir]++;

> +                lat = ktime_sub(now, lrbp->issue_time_stamp);

> +                m->lat_sum[dir] += lat;

> +                if (m->lat_max[dir] < lat || !m->lat_max[dir])

> +                        m->lat_max[dir] = lat;

> +                if (m->lat_min[dir] > lat || !m->lat_min[dir])

> +                        m->lat_min[dir] = lat;


This if statement can be shorted, by setting lat_max / lat_min as default value.

> +

> +                m->nr_queued[dir]--;

> +                /* Push forward the busy start of monitor */

> +                m->busy_start_ts[dir] = now;

> +        }

> +}


Thanks,
Daejun
Can Guo April 6, 2021, 5:37 a.m. UTC | #2
Hi Daejun,

On 2021-04-06 12:11, Daejun Park wrote:
> Hi Can Guo,

> 

>> +static ssize_t monitor_enable_store(struct device *dev,

>> +                                    struct device_attribute *attr,

>> +                                    const char *buf, size_t count)

>> +{

>> +        struct ufs_hba *hba = dev_get_drvdata(dev);

>> +        unsigned long value, flags;

>> +

>> +        if (kstrtoul(buf, 0, &value))

>> +                return -EINVAL;

>> +

>> +        value = !!value;

>> +        spin_lock_irqsave(hba->host->host_lock, flags);

>> +        if (value == hba->monitor.enabled)

>> +                goto out_unlock;

>> +

>> +        if (!value) {

>> +                memset(&hba->monitor, 0, sizeof(hba->monitor));

>> +        } else {

>> +                hba->monitor.enabled = true;

>> +                hba->monitor.enabled_ts = ktime_get();

> 

> How about setting lat_max to and lat_min to KTIME_MAX and 0?


lat_min is already 0. What is the benefit of setting lat_max to 
KTIME_MAX?

> I think lat_sum should be 0 at this point.


lat_sum is already 0 at this point, what is the problem?

> 

>> +        }

>> +

>> +out_unlock:

>> +        spin_unlock_irqrestore(hba->host->host_lock, flags);

>> +        return count;

>> +}

> 

> 

>> +static void ufshcd_update_monitor(struct ufs_hba *hba, struct 

>> ufshcd_lrb *lrbp)

>> +{

>> +        int dir = ufshcd_monitor_opcode2dir(*lrbp->cmd->cmnd);

>> +

>> +        if (dir >= 0 && hba->monitor.nr_queued[dir] > 0) {

>> +                struct request *req = lrbp->cmd->request;

>> +                struct ufs_hba_monitor *m = &hba->monitor;

>> +                ktime_t now, inc, lat;

>> +

>> +                now = ktime_get();

> 

> How about using lrbp->compl_time_stamp instead of getting new value?


I am expecting "now" keeps increasing and use it to update 
m->busy_start_s,
but if I use lrbp->compl_time_stamp to do that, below line ktime_sub() 
may
give me an unexpected value as lrbp->compl_time_stamp may be smaller 
than
m->busy_start_ts, because the actual requests are not completed by the 
device
in the exact same ordering as the bits set in hba->outstanding_tasks, 
but driver
is completing them from bit 0 to bit 31 in ascending order.

> 

>> +                inc = ktime_sub(now, m->busy_start_ts[dir]);

>> +                m->total_busy[dir] = ktime_add(m->total_busy[dir], 

>> inc);

>> +                m->nr_sec_rw[dir] += blk_rq_sectors(req);

>> +

>> +                /* Update latencies */

>> +                m->nr_req[dir]++;

>> +                lat = ktime_sub(now, lrbp->issue_time_stamp);

>> +                m->lat_sum[dir] += lat;

>> +                if (m->lat_max[dir] < lat || !m->lat_max[dir])

>> +                        m->lat_max[dir] = lat;

>> +                if (m->lat_min[dir] > lat || !m->lat_min[dir])

>> +                        m->lat_min[dir] = lat;

> 

> This if statement can be shorted, by setting lat_max / lat_min as 

> default value.


I don't quite get it, can you show me the code sample?

Thanks,
Can Guo

> 

>> +

>> +                m->nr_queued[dir]--;

>> +                /* Push forward the busy start of monitor */

>> +                m->busy_start_ts[dir] = now;

>> +        }

>> +}

> 

> Thanks,

> Daejun
Can Guo April 6, 2021, 5:43 a.m. UTC | #3
On 2021-04-06 13:37, Can Guo wrote:
> Hi Daejun,

> 

> On 2021-04-06 12:11, Daejun Park wrote:

>> Hi Can Guo,

>> 

>>> +static ssize_t monitor_enable_store(struct device *dev,

>>> +                                    struct device_attribute *attr,

>>> +                                    const char *buf, size_t count)

>>> +{

>>> +        struct ufs_hba *hba = dev_get_drvdata(dev);

>>> +        unsigned long value, flags;

>>> +

>>> +        if (kstrtoul(buf, 0, &value))

>>> +                return -EINVAL;

>>> +

>>> +        value = !!value;

>>> +        spin_lock_irqsave(hba->host->host_lock, flags);

>>> +        if (value == hba->monitor.enabled)

>>> +                goto out_unlock;

>>> +

>>> +        if (!value) {

>>> +                memset(&hba->monitor, 0, sizeof(hba->monitor));

>>> +        } else {

>>> +                hba->monitor.enabled = true;

>>> +                hba->monitor.enabled_ts = ktime_get();

>> 

>> How about setting lat_max to and lat_min to KTIME_MAX and 0?

> 

> lat_min is already 0. What is the benefit of setting lat_max to 

> KTIME_MAX?

> 

>> I think lat_sum should be 0 at this point.

> 

> lat_sum is already 0 at this point, what is the problem?

> 

>> 

>>> +        }

>>> +

>>> +out_unlock:

>>> +        spin_unlock_irqrestore(hba->host->host_lock, flags);

>>> +        return count;

>>> +}

>> 

>> 

>>> +static void ufshcd_update_monitor(struct ufs_hba *hba, struct 

>>> ufshcd_lrb *lrbp)

>>> +{

>>> +        int dir = ufshcd_monitor_opcode2dir(*lrbp->cmd->cmnd);

>>> +

>>> +        if (dir >= 0 && hba->monitor.nr_queued[dir] > 0) {

>>> +                struct request *req = lrbp->cmd->request;

>>> +                struct ufs_hba_monitor *m = &hba->monitor;

>>> +                ktime_t now, inc, lat;

>>> +

>>> +                now = ktime_get();

>> 

>> How about using lrbp->compl_time_stamp instead of getting new value?

> 

> I am expecting "now" keeps increasing and use it to update 

> m->busy_start_s,

> but if I use lrbp->compl_time_stamp to do that, below line ktime_sub() 

> may

> give me an unexpected value as lrbp->compl_time_stamp may be smaller 

> than

> m->busy_start_ts, because the actual requests are not completed by the 

> device

> in the exact same ordering as the bits set in hba->outstanding_tasks, 

> but driver

> is completing them from bit 0 to bit 31 in ascending order.


Sorry, I missunderstood your point... Yes, we can use 
lrbp->compl_time_stamp.

Thanks,
Can Guo.

> 

>> 

>>> +                inc = ktime_sub(now, m->busy_start_ts[dir]);

>>> +                m->total_busy[dir] = ktime_add(m->total_busy[dir], 

>>> inc);

>>> +                m->nr_sec_rw[dir] += blk_rq_sectors(req);

>>> +

>>> +                /* Update latencies */

>>> +                m->nr_req[dir]++;

>>> +                lat = ktime_sub(now, lrbp->issue_time_stamp);

>>> +                m->lat_sum[dir] += lat;

>>> +                if (m->lat_max[dir] < lat || !m->lat_max[dir])

>>> +                        m->lat_max[dir] = lat;

>>> +                if (m->lat_min[dir] > lat || !m->lat_min[dir])

>>> +                        m->lat_min[dir] = lat;

>> 

>> This if statement can be shorted, by setting lat_max / lat_min as 

>> default value.

> 

> I don't quite get it, can you show me the code sample?

> 

> Thanks,

> Can Guo

> 

>> 

>>> +

>>> +                m->nr_queued[dir]--;

>>> +                /* Push forward the busy start of monitor */

>>> +                m->busy_start_ts[dir] = now;

>>> +        }

>>> +}

>> 

>> Thanks,

>> Daejun
Daejun Park April 6, 2021, 5:58 a.m. UTC | #4
Hi Can Guo,
> 

>Hi Daejun,

> 

>On 2021-04-06 12:11, Daejun Park wrote:

>> Hi Can Guo,

>> 

>>> +static ssize_t monitor_enable_store(struct device *dev,

>>> +                                    struct device_attribute *attr,

>>> +                                    const char *buf, size_t count)

>>> +{

>>> +        struct ufs_hba *hba = dev_get_drvdata(dev);

>>> +        unsigned long value, flags;

>>> +

>>> +        if (kstrtoul(buf, 0, &value))

>>> +                return -EINVAL;

>>> +

>>> +        value = !!value;

>>> +        spin_lock_irqsave(hba->host->host_lock, flags);

>>> +        if (value == hba->monitor.enabled)

>>> +                goto out_unlock;

>>> +

>>> +        if (!value) {

>>> +                memset(&hba->monitor, 0, sizeof(hba->monitor));

>>> +        } else {

>>> +                hba->monitor.enabled = true;

>>> +                hba->monitor.enabled_ts = ktime_get();

>> 

>> How about setting lat_max to and lat_min to KTIME_MAX and 0?

> 

>lat_min is already 0. What is the benefit of setting lat_max to 

>KTIME_MAX?

> 

>> I think lat_sum should be 0 at this point.

> 

>lat_sum is already 0 at this point, what is the problem?


Sorry. I misunderstood about resetting monitor values.

> 

>> 

>>> +        }

>>> +

>>> +out_unlock:

>>> +        spin_unlock_irqrestore(hba->host->host_lock, flags);

>>> +        return count;

>>> +}

>> 

>> 

>>> +static void ufshcd_update_monitor(struct ufs_hba *hba, struct 

>>> ufshcd_lrb *lrbp)

>>> +{

>>> +        int dir = ufshcd_monitor_opcode2dir(*lrbp->cmd->cmnd);

>>> +

>>> +        if (dir >= 0 && hba->monitor.nr_queued[dir] > 0) {

>>> +                struct request *req = lrbp->cmd->request;

>>> +                struct ufs_hba_monitor *m = &hba->monitor;

>>> +                ktime_t now, inc, lat;

>>> +

>>> +                now = ktime_get();

>> 

>> How about using lrbp->compl_time_stamp instead of getting new value?

> 

>I am expecting "now" keeps increasing and use it to update 

>m->busy_start_s,

>but if I use lrbp->compl_time_stamp to do that, below line ktime_sub() 

>may

>give me an unexpected value as lrbp->compl_time_stamp may be smaller 

>than

>m->busy_start_ts, because the actual requests are not completed by the 

>device

>in the exact same ordering as the bits set in hba->outstanding_tasks, 

>but driver

>is completing them from bit 0 to bit 31 in ascending order.


lrbp->compl_time_stamp is set just before calling ufshcd_update_monitor().
And I don't think it can be negative value, because ufshcd_send_command()
and __ufshcd_transfer_req_compl() are protected by host lock.

> 

>> 

>>> +                inc = ktime_sub(now, m->busy_start_ts[dir]);

>>> +                m->total_busy[dir] = ktime_add(m->total_busy[dir], 

>>> inc);

>>> +                m->nr_sec_rw[dir] += blk_rq_sectors(req);

>>> +

>>> +                /* Update latencies */

>>> +                m->nr_req[dir]++;

>>> +                lat = ktime_sub(now, lrbp->issue_time_stamp);

>>> +                m->lat_sum[dir] += lat;

>>> +                if (m->lat_max[dir] < lat || !m->lat_max[dir])

>>> +                        m->lat_max[dir] = lat;

>>> +                if (m->lat_min[dir] > lat || !m->lat_min[dir])

>>> +                        m->lat_min[dir] = lat;

>> 

>> This if statement can be shorted, by setting lat_max / lat_min as 

>> default value.

> 

>I don't quite get it, can you show me the code sample?


I think " || !m->lat_max[dir]" can be removed.

                if (m->lat_max[dir] < lat)
                        m->lat_max[dir] = lat;
                if (m->lat_min[dir] > lat)
                        m->lat_min[dir] = lat;
						
Thanks,
Daejun

> 

>Thanks,

>Can Guo

> 

>> 

>>> +

>>> +                m->nr_queued[dir]--;

>>> +                /* Push forward the busy start of monitor */

>>> +                m->busy_start_ts[dir] = now;

>>> +        }

>>> +}

>> 

>> Thanks,

>> Daejun
Can Guo April 6, 2021, 6:11 a.m. UTC | #5
On 2021-04-06 13:58, Daejun Park wrote:
> Hi Can Guo,

>> 

>> Hi Daejun,

>> 

>> On 2021-04-06 12:11, Daejun Park wrote:

>>> Hi Can Guo,

>>> 

>>>> +static ssize_t monitor_enable_store(struct device *dev,

>>>> +                                    struct device_attribute *attr,

>>>> +                                    const char *buf, size_t count)

>>>> +{

>>>> +        struct ufs_hba *hba = dev_get_drvdata(dev);

>>>> +        unsigned long value, flags;

>>>> +

>>>> +        if (kstrtoul(buf, 0, &value))

>>>> +                return -EINVAL;

>>>> +

>>>> +        value = !!value;

>>>> +        spin_lock_irqsave(hba->host->host_lock, flags);

>>>> +        if (value == hba->monitor.enabled)

>>>> +                goto out_unlock;

>>>> +

>>>> +        if (!value) {

>>>> +                memset(&hba->monitor, 0, sizeof(hba->monitor));

>>>> +        } else {

>>>> +                hba->monitor.enabled = true;

>>>> +                hba->monitor.enabled_ts = ktime_get();

>>> 

>>> How about setting lat_max to and lat_min to KTIME_MAX and 0?

>> 

>> lat_min is already 0. What is the benefit of setting lat_max to

>> KTIME_MAX?

>> 

>>> I think lat_sum should be 0 at this point.

>> 

>> lat_sum is already 0 at this point, what is the problem?

> 

> Sorry. I misunderstood about resetting monitor values.

> 

>> 

>>> 

>>>> +        }

>>>> +

>>>> +out_unlock:

>>>> +        spin_unlock_irqrestore(hba->host->host_lock, flags);

>>>> +        return count;

>>>> +}

>>> 

>>> 

>>>> +static void ufshcd_update_monitor(struct ufs_hba *hba, struct

>>>> ufshcd_lrb *lrbp)

>>>> +{

>>>> +        int dir = ufshcd_monitor_opcode2dir(*lrbp->cmd->cmnd);

>>>> +

>>>> +        if (dir >= 0 && hba->monitor.nr_queued[dir] > 0) {

>>>> +                struct request *req = lrbp->cmd->request;

>>>> +                struct ufs_hba_monitor *m = &hba->monitor;

>>>> +                ktime_t now, inc, lat;

>>>> +

>>>> +                now = ktime_get();

>>> 

>>> How about using lrbp->compl_time_stamp instead of getting new value?

>> 

>> I am expecting "now" keeps increasing and use it to update

>> m->busy_start_s,

>> but if I use lrbp->compl_time_stamp to do that, below line ktime_sub()

>> may

>> give me an unexpected value as lrbp->compl_time_stamp may be smaller

>> than

>> m->busy_start_ts, because the actual requests are not completed by the

>> device

>> in the exact same ordering as the bits set in hba->outstanding_tasks,

>> but driver

>> is completing them from bit 0 to bit 31 in ascending order.

> 

> lrbp->compl_time_stamp is set just before calling 

> ufshcd_update_monitor().

> And I don't think it can be negative value, because 

> ufshcd_send_command()

> and __ufshcd_transfer_req_compl() are protected by host lock.

> 


Yes, I replied u in another mail... I will use the compl_time_stamp in 
next
version. And later I will add alloc_time_stamp and release_time_stamp to 
lrbp
so that we can monitor the overall send/compl path, including hpb_prep() 
and
hpb_rsp().

>> 

>>> 

>>>> +                inc = ktime_sub(now, m->busy_start_ts[dir]);

>>>> +                m->total_busy[dir] = ktime_add(m->total_busy[dir],

>>>> inc);

>>>> +                m->nr_sec_rw[dir] += blk_rq_sectors(req);

>>>> +

>>>> +                /* Update latencies */

>>>> +                m->nr_req[dir]++;

>>>> +                lat = ktime_sub(now, lrbp->issue_time_stamp);

>>>> +                m->lat_sum[dir] += lat;

>>>> +                if (m->lat_max[dir] < lat || !m->lat_max[dir])

>>>> +                        m->lat_max[dir] = lat;

>>>> +                if (m->lat_min[dir] > lat || !m->lat_min[dir])

>>>> +                        m->lat_min[dir] = lat;

>>> 

>>> This if statement can be shorted, by setting lat_max / lat_min as

>>> default value.

>> 

>> I don't quite get it, can you show me the code sample?

> 

> I think " || !m->lat_max[dir]" can be removed.

> 

>                 if (m->lat_max[dir] < lat)

>                         m->lat_max[dir] = lat;

>                 if (m->lat_min[dir] > lat)

>                         m->lat_min[dir] = lat;

> 


 From the beginning, lat_min is 0, without "!m->lat_min[dir]", m->lat_min
will never be updated. Same for lat_max. Meanwhile, !m->lat_min/max will
be hit only once in each round, which does not hurt.

Thanks,
Can Guo.

> Thanks,

> Daejun

> 

>> 

>> Thanks,

>> Can Guo

>> 

>>> 

>>>> +

>>>> +                m->nr_queued[dir]--;

>>>> +                /* Push forward the busy start of monitor */

>>>> +                m->busy_start_ts[dir] = now;

>>>> +        }

>>>> +}

>>> 

>>> Thanks,

>>> Daejun
diff mbox series

Patch

diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c
index acc54f5..348df0e 100644
--- a/drivers/scsi/ufs/ufs-sysfs.c
+++ b/drivers/scsi/ufs/ufs-sysfs.c
@@ -278,6 +278,242 @@  static const struct attribute_group ufs_sysfs_default_group = {
 	.attrs = ufs_sysfs_ufshcd_attrs,
 };
 
+static ssize_t monitor_enable_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%d\n", hba->monitor.enabled);
+}
+
+static ssize_t monitor_enable_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+	unsigned long value, flags;
+
+	if (kstrtoul(buf, 0, &value))
+		return -EINVAL;
+
+	value = !!value;
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	if (value == hba->monitor.enabled)
+		goto out_unlock;
+
+	if (!value) {
+		memset(&hba->monitor, 0, sizeof(hba->monitor));
+	} else {
+		hba->monitor.enabled = true;
+		hba->monitor.enabled_ts = ktime_get();
+	}
+
+out_unlock:
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	return count;
+}
+
+static ssize_t monitor_chunk_size_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", hba->monitor.chunk_size);
+}
+
+static ssize_t monitor_chunk_size_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+	unsigned long value, flags;
+
+	if (kstrtoul(buf, 0, &value))
+		return -EINVAL;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	/* Only allow chunk size change when monitor is disabled */
+	if (!hba->monitor.enabled)
+		hba->monitor.chunk_size = value;
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+	return count;
+}
+
+static ssize_t read_total_sectors_show(struct device *dev,
+				       struct device_attribute *attr, char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", hba->monitor.nr_sec_rw[READ]);
+}
+
+static ssize_t read_total_busy_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  ktime_to_us(hba->monitor.total_busy[READ]));
+}
+
+static ssize_t read_nr_requests_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", hba->monitor.nr_req[READ]);
+}
+
+static ssize_t read_req_latency_avg_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+	struct ufs_hba_monitor *m = &hba->monitor;
+
+	return sysfs_emit(buf, "%llu\n", div_u64(ktime_to_us(m->lat_sum[READ]),
+						 m->nr_req[READ]));
+}
+
+static ssize_t read_req_latency_max_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  ktime_to_us(hba->monitor.lat_max[READ]));
+}
+
+static ssize_t read_req_latency_min_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  ktime_to_us(hba->monitor.lat_min[READ]));
+}
+
+static ssize_t read_req_latency_sum_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  ktime_to_us(hba->monitor.lat_sum[READ]));
+}
+
+static ssize_t write_total_sectors_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", hba->monitor.nr_sec_rw[WRITE]);
+}
+
+static ssize_t write_total_busy_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  ktime_to_us(hba->monitor.total_busy[WRITE]));
+}
+
+static ssize_t write_nr_requests_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", hba->monitor.nr_req[WRITE]);
+}
+
+static ssize_t write_req_latency_avg_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+	struct ufs_hba_monitor *m = &hba->monitor;
+
+	return sysfs_emit(buf, "%llu\n", div_u64(ktime_to_us(m->lat_sum[WRITE]),
+						 m->nr_req[WRITE]));
+}
+
+static ssize_t write_req_latency_max_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  ktime_to_us(hba->monitor.lat_max[WRITE]));
+}
+
+static ssize_t write_req_latency_min_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  ktime_to_us(hba->monitor.lat_min[WRITE]));
+}
+
+static ssize_t write_req_latency_sum_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct ufs_hba *hba = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n",
+			  ktime_to_us(hba->monitor.lat_sum[WRITE]));
+}
+
+static DEVICE_ATTR_RW(monitor_enable);
+static DEVICE_ATTR_RW(monitor_chunk_size);
+static DEVICE_ATTR_RO(read_total_sectors);
+static DEVICE_ATTR_RO(read_total_busy);
+static DEVICE_ATTR_RO(read_nr_requests);
+static DEVICE_ATTR_RO(read_req_latency_avg);
+static DEVICE_ATTR_RO(read_req_latency_max);
+static DEVICE_ATTR_RO(read_req_latency_min);
+static DEVICE_ATTR_RO(read_req_latency_sum);
+static DEVICE_ATTR_RO(write_total_sectors);
+static DEVICE_ATTR_RO(write_total_busy);
+static DEVICE_ATTR_RO(write_nr_requests);
+static DEVICE_ATTR_RO(write_req_latency_avg);
+static DEVICE_ATTR_RO(write_req_latency_max);
+static DEVICE_ATTR_RO(write_req_latency_min);
+static DEVICE_ATTR_RO(write_req_latency_sum);
+
+static struct attribute *ufs_sysfs_monitor_attrs[] = {
+	&dev_attr_monitor_enable.attr,
+	&dev_attr_monitor_chunk_size.attr,
+	&dev_attr_read_total_sectors.attr,
+	&dev_attr_read_total_busy.attr,
+	&dev_attr_read_nr_requests.attr,
+	&dev_attr_read_req_latency_avg.attr,
+	&dev_attr_read_req_latency_max.attr,
+	&dev_attr_read_req_latency_min.attr,
+	&dev_attr_read_req_latency_sum.attr,
+	&dev_attr_write_total_sectors.attr,
+	&dev_attr_write_total_busy.attr,
+	&dev_attr_write_nr_requests.attr,
+	&dev_attr_write_req_latency_avg.attr,
+	&dev_attr_write_req_latency_max.attr,
+	&dev_attr_write_req_latency_min.attr,
+	&dev_attr_write_req_latency_sum.attr,
+	NULL
+};
+
+static const struct attribute_group ufs_sysfs_monitor_group = {
+	.name = "monitor",
+	.attrs = ufs_sysfs_monitor_attrs,
+};
+
 static ssize_t ufs_sysfs_read_desc_param(struct ufs_hba *hba,
 				  enum desc_idn desc_id,
 				  u8 desc_index,
@@ -881,6 +1117,7 @@  static const struct attribute_group ufs_sysfs_attributes_group = {
 
 static const struct attribute_group *ufs_sysfs_groups[] = {
 	&ufs_sysfs_default_group,
+	&ufs_sysfs_monitor_group,
 	&ufs_sysfs_device_descriptor_group,
 	&ufs_sysfs_interconnect_descriptor_group,
 	&ufs_sysfs_geometry_descriptor_group,
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 80620c8..b49555fa 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2028,6 +2028,64 @@  static void ufshcd_clk_scaling_update_busy(struct ufs_hba *hba)
 		scaling->is_busy_started = false;
 	}
 }
+
+static inline int ufshcd_monitor_opcode2dir(u8 opcode)
+{
+	if (opcode == READ_6 || opcode == READ_10 || opcode == READ_16)
+		return READ;
+	else if (opcode == WRITE_6 || opcode == WRITE_10 || opcode == WRITE_16)
+		return WRITE;
+	else
+		return -EINVAL;
+}
+
+static inline bool ufshcd_should_inform_monitor(struct ufs_hba *hba,
+						struct ufshcd_lrb *lrbp)
+{
+	struct ufs_hba_monitor *m = &hba->monitor;
+
+	return (m->enabled && lrbp && lrbp->cmd &&
+		(!m->chunk_size || m->chunk_size == lrbp->cmd->sdb.length) &&
+		ktime_before(hba->monitor.enabled_ts, lrbp->issue_time_stamp));
+}
+
+static void ufshcd_start_monitor(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+{
+	int dir = ufshcd_monitor_opcode2dir(*lrbp->cmd->cmnd);
+
+	if (dir >= 0 && hba->monitor.nr_queued[dir]++ == 0)
+		hba->monitor.busy_start_ts[dir] = ktime_get();
+}
+
+static void ufshcd_update_monitor(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
+{
+	int dir = ufshcd_monitor_opcode2dir(*lrbp->cmd->cmnd);
+
+	if (dir >= 0 && hba->monitor.nr_queued[dir] > 0) {
+		struct request *req = lrbp->cmd->request;
+		struct ufs_hba_monitor *m = &hba->monitor;
+		ktime_t now, inc, lat;
+
+		now = ktime_get();
+		inc = ktime_sub(now, m->busy_start_ts[dir]);
+		m->total_busy[dir] = ktime_add(m->total_busy[dir], inc);
+		m->nr_sec_rw[dir] += blk_rq_sectors(req);
+
+		/* Update latencies */
+		m->nr_req[dir]++;
+		lat = ktime_sub(now, lrbp->issue_time_stamp);
+		m->lat_sum[dir] += lat;
+		if (m->lat_max[dir] < lat || !m->lat_max[dir])
+			m->lat_max[dir] = lat;
+		if (m->lat_min[dir] > lat || !m->lat_min[dir])
+			m->lat_min[dir] = lat;
+
+		m->nr_queued[dir]--;
+		/* Push forward the busy start of monitor */
+		m->busy_start_ts[dir] = now;
+	}
+}
+
 /**
  * ufshcd_send_command - Send SCSI or device management commands
  * @hba: per adapter instance
@@ -2044,6 +2102,8 @@  void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 	ufshcd_add_command_trace(hba, task_tag, UFS_CMD_SEND);
 	ufshcd_clk_scaling_start_busy(hba);
 	__set_bit(task_tag, &hba->outstanding_reqs);
+	if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
+		ufshcd_start_monitor(hba, lrbp);
 	ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
 	/* Make sure that doorbell is committed immediately */
 	wmb();
@@ -5098,6 +5158,8 @@  static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 		lrbp->compl_time_stamp = ktime_get();
 		cmd = lrbp->cmd;
 		if (cmd) {
+			if (unlikely(ufshcd_should_inform_monitor(hba, lrbp)))
+				ufshcd_update_monitor(hba, lrbp);
 			ufshcd_add_command_trace(hba, index, UFS_CMD_COMP);
 			result = ufshcd_transfer_rsp_status(hba, lrbp);
 			scsi_dma_unmap(cmd);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 18e56c1..2bfe20e 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -645,6 +645,25 @@  struct ufs_hba_variant_params {
 	u32 wb_flush_threshold;
 };
 
+struct ufs_hba_monitor {
+	unsigned long chunk_size;
+
+	unsigned long nr_sec_rw[2];
+	ktime_t total_busy[2];
+
+	unsigned long nr_req[2];
+	/* latencies*/
+	ktime_t lat_sum[2];
+	ktime_t lat_max[2];
+	ktime_t lat_min[2];
+
+	u32 nr_queued[2];
+	ktime_t busy_start_ts[2];
+
+	ktime_t enabled_ts;
+	bool enabled;
+};
+
 /**
  * struct ufs_hba - per adapter private structure
  * @mmio_base: UFSHCI base register address
@@ -832,6 +851,8 @@  struct ufs_hba {
 	struct request_queue	*bsg_queue;
 	struct delayed_work rpm_dev_flush_recheck_work;
 
+	struct ufs_hba_monitor	monitor;
+
 #ifdef CONFIG_SCSI_UFS_CRYPTO
 	union ufs_crypto_capabilities crypto_capabilities;
 	union ufs_crypto_cap_entry *crypto_cap_array;