Message ID | 20220214080002.18381-3-nj.shetty@samsung.com |
---|---|
State | New |
Headers | show |
Series | Add Copy offload support | expand |
The subject says limits for copy-offload... On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote: > Add device limits as sysfs entries, > - copy_offload (RW) > - copy_max_bytes (RW) > - copy_max_hw_bytes (RO) > - copy_max_range_bytes (RW) > - copy_max_range_hw_bytes (RO) > - copy_max_nr_ranges (RW) > - copy_max_nr_ranges_hw (RO) Some of these seem like generic... and also I see a few more max_hw ones not listed above... > --- a/block/blk-settings.c > +++ b/block/blk-settings.c > +/** > + * blk_queue_max_copy_sectors - set max sectors for a single copy payload > + * @q: the request queue for the device > + * @max_copy_sectors: maximum number of sectors to copy > + **/ > +void blk_queue_max_copy_sectors(struct request_queue *q, > + unsigned int max_copy_sectors) > +{ > + q->limits.max_hw_copy_sectors = max_copy_sectors; > + q->limits.max_copy_sectors = max_copy_sectors; > +} > +EXPORT_SYMBOL(blk_queue_max_copy_sectors); Please use EXPORT_SYMBOL_GPL() for all new things. Why is this setting both? The documentation does't seem to say. What's the point? > + > +/** > + * blk_queue_max_copy_range_sectors - set max sectors for a single range, in a copy payload > + * @q: the request queue for the device > + * @max_copy_range_sectors: maximum number of sectors to copy in a single range > + **/ > +void blk_queue_max_copy_range_sectors(struct request_queue *q, > + unsigned int max_copy_range_sectors) > +{ > + q->limits.max_hw_copy_range_sectors = max_copy_range_sectors; > + q->limits.max_copy_range_sectors = max_copy_range_sectors; > +} > +EXPORT_SYMBOL(blk_queue_max_copy_range_sectors); Same here. > +/** > + * blk_queue_max_copy_nr_ranges - set max number of ranges, in a copy payload > + * @q: the request queue for the device > + * @max_copy_nr_ranges: maximum number of ranges > + **/ > +void blk_queue_max_copy_nr_ranges(struct request_queue *q, > + unsigned int max_copy_nr_ranges) > +{ > + q->limits.max_hw_copy_nr_ranges = max_copy_nr_ranges; > + q->limits.max_copy_nr_ranges = max_copy_nr_ranges; > +} > +EXPORT_SYMBOL(blk_queue_max_copy_nr_ranges); Same. > + > /** > * blk_queue_max_write_same_sectors - set max sectors for a single write same > * @q: the request queue for the device > @@ -541,6 +592,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, > t->max_segment_size = min_not_zero(t->max_segment_size, > b->max_segment_size); > > + t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors); > + t->max_hw_copy_sectors = min(t->max_hw_copy_sectors, b->max_hw_copy_sectors); > + t->max_copy_range_sectors = min(t->max_copy_range_sectors, b->max_copy_range_sectors); > + t->max_hw_copy_range_sectors = min(t->max_hw_copy_range_sectors, > + b->max_hw_copy_range_sectors); > + t->max_copy_nr_ranges = min(t->max_copy_nr_ranges, b->max_copy_nr_ranges); > + t->max_hw_copy_nr_ranges = min(t->max_hw_copy_nr_ranges, b->max_hw_copy_nr_ranges); > + > t->misaligned |= b->misaligned; > > alignment = queue_limit_alignment_offset(b, start); > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > index 9f32882ceb2f..9ddd07f142d9 100644 > --- a/block/blk-sysfs.c > +++ b/block/blk-sysfs.c > @@ -212,6 +212,129 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag > return queue_var_show(0, page); > } > > +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page) > +{ > + return queue_var_show(blk_queue_copy(q), page); > +} > + > +static ssize_t queue_copy_offload_store(struct request_queue *q, > + const char *page, size_t count) > +{ > + unsigned long copy_offload; > + ssize_t ret = queue_var_store(©_offload, page, count); > + > + if (ret < 0) > + return ret; > + > + if (copy_offload && !q->limits.max_hw_copy_sectors) > + return -EINVAL; If the kernel schedules, copy_offload may still be true and max_hw_copy_sectors may be set to 0. Is that an issue? > + > + if (copy_offload) > + blk_queue_flag_set(QUEUE_FLAG_COPY, q); > + else > + blk_queue_flag_clear(QUEUE_FLAG_COPY, q); The flag may be set but the queue flag could be set. Is that an issue? > @@ -597,6 +720,14 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); > QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones"); > QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones"); > > +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload"); > +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_hw_bytes"); > +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes"); > +QUEUE_RO_ENTRY(queue_copy_range_max_hw, "copy_max_range_hw_bytes"); > +QUEUE_RW_ENTRY(queue_copy_range_max, "copy_max_range_bytes"); > +QUEUE_RO_ENTRY(queue_copy_nr_ranges_max_hw, "copy_max_nr_ranges_hw"); > +QUEUE_RW_ENTRY(queue_copy_nr_ranges_max, "copy_max_nr_ranges"); Seems like you need to update Documentation/ABI/stable/sysfs-block. > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index efed3820cbf7..792e6d556589 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -254,6 +254,13 @@ struct queue_limits { > unsigned int discard_alignment; > unsigned int zone_write_granularity; > > + unsigned long max_hw_copy_sectors; > + unsigned long max_copy_sectors; > + unsigned int max_hw_copy_range_sectors; > + unsigned int max_copy_range_sectors; > + unsigned short max_hw_copy_nr_ranges; > + unsigned short max_copy_nr_ranges; Before limits start growing more.. I wonder if we should just stuff hw offload stuff to its own struct within queue_limits. Christoph? Luis
On 2/17/22 1:07 AM, Luis Chamberlain wrote: > The subject says limits for copy-offload... > > On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote: >> Add device limits as sysfs entries, >> - copy_offload (RW) >> - copy_max_bytes (RW) >> - copy_max_hw_bytes (RO) >> - copy_max_range_bytes (RW) >> - copy_max_range_hw_bytes (RO) >> - copy_max_nr_ranges (RW) >> - copy_max_nr_ranges_hw (RO) > > Some of these seem like generic... and also I see a few more max_hw ones > not listed above... > >> --- a/block/blk-settings.c >> +++ b/block/blk-settings.c >> +/** >> + * blk_queue_max_copy_sectors - set max sectors for a single copy payload >> + * @q: the request queue for the device >> + * @max_copy_sectors: maximum number of sectors to copy >> + **/ >> +void blk_queue_max_copy_sectors(struct request_queue *q, >> + unsigned int max_copy_sectors) >> +{ >> + q->limits.max_hw_copy_sectors = max_copy_sectors; >> + q->limits.max_copy_sectors = max_copy_sectors; >> +} >> +EXPORT_SYMBOL(blk_queue_max_copy_sectors); > > Please use EXPORT_SYMBOL_GPL() for all new things. > > Why is this setting both? The documentation does't seem to say. > What's the point? > >> + >> +/** >> + * blk_queue_max_copy_range_sectors - set max sectors for a single range, in a copy payload >> + * @q: the request queue for the device >> + * @max_copy_range_sectors: maximum number of sectors to copy in a single range >> + **/ >> +void blk_queue_max_copy_range_sectors(struct request_queue *q, >> + unsigned int max_copy_range_sectors) >> +{ >> + q->limits.max_hw_copy_range_sectors = max_copy_range_sectors; >> + q->limits.max_copy_range_sectors = max_copy_range_sectors; >> +} >> +EXPORT_SYMBOL(blk_queue_max_copy_range_sectors); > > Same here. > >> +/** >> + * blk_queue_max_copy_nr_ranges - set max number of ranges, in a copy payload >> + * @q: the request queue for the device >> + * @max_copy_nr_ranges: maximum number of ranges >> + **/ >> +void blk_queue_max_copy_nr_ranges(struct request_queue *q, >> + unsigned int max_copy_nr_ranges) >> +{ >> + q->limits.max_hw_copy_nr_ranges = max_copy_nr_ranges; >> + q->limits.max_copy_nr_ranges = max_copy_nr_ranges; >> +} >> +EXPORT_SYMBOL(blk_queue_max_copy_nr_ranges); > > Same. > >> + >> /** >> * blk_queue_max_write_same_sectors - set max sectors for a single write same >> * @q: the request queue for the device >> @@ -541,6 +592,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, >> t->max_segment_size = min_not_zero(t->max_segment_size, >> b->max_segment_size); >> >> + t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors); >> + t->max_hw_copy_sectors = min(t->max_hw_copy_sectors, b->max_hw_copy_sectors); >> + t->max_copy_range_sectors = min(t->max_copy_range_sectors, b->max_copy_range_sectors); >> + t->max_hw_copy_range_sectors = min(t->max_hw_copy_range_sectors, >> + b->max_hw_copy_range_sectors); >> + t->max_copy_nr_ranges = min(t->max_copy_nr_ranges, b->max_copy_nr_ranges); >> + t->max_hw_copy_nr_ranges = min(t->max_hw_copy_nr_ranges, b->max_hw_copy_nr_ranges); >> + >> t->misaligned |= b->misaligned; >> >> alignment = queue_limit_alignment_offset(b, start); >> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c >> index 9f32882ceb2f..9ddd07f142d9 100644 >> --- a/block/blk-sysfs.c >> +++ b/block/blk-sysfs.c >> @@ -212,6 +212,129 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag >> return queue_var_show(0, page); >> } >> >> +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page) >> +{ >> + return queue_var_show(blk_queue_copy(q), page); >> +} >> + >> +static ssize_t queue_copy_offload_store(struct request_queue *q, >> + const char *page, size_t count) >> +{ >> + unsigned long copy_offload; >> + ssize_t ret = queue_var_store(©_offload, page, count); >> + >> + if (ret < 0) >> + return ret; >> + >> + if (copy_offload && !q->limits.max_hw_copy_sectors) >> + return -EINVAL; > > > If the kernel schedules, copy_offload may still be true and > max_hw_copy_sectors may be set to 0. Is that an issue? > >> + >> + if (copy_offload) >> + blk_queue_flag_set(QUEUE_FLAG_COPY, q); >> + else >> + blk_queue_flag_clear(QUEUE_FLAG_COPY, q); > > The flag may be set but the queue flag could be set. Is that an issue? > >> @@ -597,6 +720,14 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); >> QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones"); >> QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones"); >> >> +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload"); >> +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_hw_bytes"); >> +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes"); >> +QUEUE_RO_ENTRY(queue_copy_range_max_hw, "copy_max_range_hw_bytes"); >> +QUEUE_RW_ENTRY(queue_copy_range_max, "copy_max_range_bytes"); >> +QUEUE_RO_ENTRY(queue_copy_nr_ranges_max_hw, "copy_max_nr_ranges_hw"); >> +QUEUE_RW_ENTRY(queue_copy_nr_ranges_max, "copy_max_nr_ranges"); > > Seems like you need to update Documentation/ABI/stable/sysfs-block. > >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h >> index efed3820cbf7..792e6d556589 100644 >> --- a/include/linux/blkdev.h >> +++ b/include/linux/blkdev.h >> @@ -254,6 +254,13 @@ struct queue_limits { >> unsigned int discard_alignment; >> unsigned int zone_write_granularity; >> >> + unsigned long max_hw_copy_sectors; >> + unsigned long max_copy_sectors; >> + unsigned int max_hw_copy_range_sectors; >> + unsigned int max_copy_range_sectors; >> + unsigned short max_hw_copy_nr_ranges; >> + unsigned short max_copy_nr_ranges; > > Before limits start growing more.. I wonder if we should just > stuff hw offload stuff to its own struct within queue_limits. > > Christoph? > Potentially use a pointer to structure and maybe make it configurable, although I'm not sure about the later part, I'll let Christoph decide that. > Luis > -ck
Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote: > The subject says limits for copy-offload... > > On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote: > > Add device limits as sysfs entries, > > - copy_offload (RW) > > - copy_max_bytes (RW) > > - copy_max_hw_bytes (RO) > > - copy_max_range_bytes (RW) > > - copy_max_range_hw_bytes (RO) > > - copy_max_nr_ranges (RW) > > - copy_max_nr_ranges_hw (RO) > > Some of these seem like generic... and also I see a few more max_hw ones > not listed above... > queue_limits and sysfs entries are differently named. All sysfs entries start with copy_* prefix. Also it makes easy to lookup all copy sysfs. For queue limits naming, I tried to following existing queue limit convention (like discard). > > --- a/block/blk-settings.c > > +++ b/block/blk-settings.c > > +/** > > + * blk_queue_max_copy_sectors - set max sectors for a single copy payload > > + * @q: the request queue for the device > > + * @max_copy_sectors: maximum number of sectors to copy > > + **/ > > +void blk_queue_max_copy_sectors(struct request_queue *q, > > + unsigned int max_copy_sectors) > > +{ > > + q->limits.max_hw_copy_sectors = max_copy_sectors; > > + q->limits.max_copy_sectors = max_copy_sectors; > > +} > > +EXPORT_SYMBOL(blk_queue_max_copy_sectors); > > Please use EXPORT_SYMBOL_GPL() for all new things. > acked. > Why is this setting both? The documentation does't seem to say. > What's the point? > This function is used only by driver, while intializing request queue. I will put this as part of description next time. > > + > > +/** > > + * blk_queue_max_copy_range_sectors - set max sectors for a single range, in a copy payload > > + * @q: the request queue for the device > > + * @max_copy_range_sectors: maximum number of sectors to copy in a single range > > + **/ > > +void blk_queue_max_copy_range_sectors(struct request_queue *q, > > + unsigned int max_copy_range_sectors) > > +{ > > + q->limits.max_hw_copy_range_sectors = max_copy_range_sectors; > > + q->limits.max_copy_range_sectors = max_copy_range_sectors; > > +} > > +EXPORT_SYMBOL(blk_queue_max_copy_range_sectors); > > Same here. > > > +/** > > + * blk_queue_max_copy_nr_ranges - set max number of ranges, in a copy payload > > + * @q: the request queue for the device > > + * @max_copy_nr_ranges: maximum number of ranges > > + **/ > > +void blk_queue_max_copy_nr_ranges(struct request_queue *q, > > + unsigned int max_copy_nr_ranges) > > +{ > > + q->limits.max_hw_copy_nr_ranges = max_copy_nr_ranges; > > + q->limits.max_copy_nr_ranges = max_copy_nr_ranges; > > +} > > +EXPORT_SYMBOL(blk_queue_max_copy_nr_ranges); > > Same. > > > + > > /** > > * blk_queue_max_write_same_sectors - set max sectors for a single write same > > * @q: the request queue for the device > > @@ -541,6 +592,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, > > t->max_segment_size = min_not_zero(t->max_segment_size, > > b->max_segment_size); > > > > + t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors); > > + t->max_hw_copy_sectors = min(t->max_hw_copy_sectors, b->max_hw_copy_sectors); > > + t->max_copy_range_sectors = min(t->max_copy_range_sectors, b->max_copy_range_sectors); > > + t->max_hw_copy_range_sectors = min(t->max_hw_copy_range_sectors, > > + b->max_hw_copy_range_sectors); > > + t->max_copy_nr_ranges = min(t->max_copy_nr_ranges, b->max_copy_nr_ranges); > > + t->max_hw_copy_nr_ranges = min(t->max_hw_copy_nr_ranges, b->max_hw_copy_nr_ranges); > > + > > t->misaligned |= b->misaligned; > > > > alignment = queue_limit_alignment_offset(b, start); > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > > index 9f32882ceb2f..9ddd07f142d9 100644 > > --- a/block/blk-sysfs.c > > +++ b/block/blk-sysfs.c > > @@ -212,6 +212,129 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag > > return queue_var_show(0, page); > > } > > > > +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page) > > +{ > > + return queue_var_show(blk_queue_copy(q), page); > > +} > > + > > +static ssize_t queue_copy_offload_store(struct request_queue *q, > > + const char *page, size_t count) > > +{ > > + unsigned long copy_offload; > > + ssize_t ret = queue_var_store(©_offload, page, count); > > + > > + if (ret < 0) > > + return ret; > > + > > + if (copy_offload && !q->limits.max_hw_copy_sectors) > > + return -EINVAL; > > > If the kernel schedules, copy_offload may still be true and > max_hw_copy_sectors may be set to 0. Is that an issue? > This check ensures that, we dont enable offload if device doesnt support offload. I feel it shouldn't be an issue. > > + > > + if (copy_offload) > > + blk_queue_flag_set(QUEUE_FLAG_COPY, q); > > + else > > + blk_queue_flag_clear(QUEUE_FLAG_COPY, q); > > The flag may be set but the queue flag could be set. Is that an issue? > > > @@ -597,6 +720,14 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); > > QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones"); > > QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones"); > > > > +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload"); > > +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_hw_bytes"); > > +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes"); > > +QUEUE_RO_ENTRY(queue_copy_range_max_hw, "copy_max_range_hw_bytes"); > > +QUEUE_RW_ENTRY(queue_copy_range_max, "copy_max_range_bytes"); > > +QUEUE_RO_ENTRY(queue_copy_nr_ranges_max_hw, "copy_max_nr_ranges_hw"); > > +QUEUE_RW_ENTRY(queue_copy_nr_ranges_max, "copy_max_nr_ranges"); > > Seems like you need to update Documentation/ABI/stable/sysfs-block. > acked. > > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > > index efed3820cbf7..792e6d556589 100644 > > --- a/include/linux/blkdev.h > > +++ b/include/linux/blkdev.h > > @@ -254,6 +254,13 @@ struct queue_limits { > > unsigned int discard_alignment; > > unsigned int zone_write_granularity; > > > > + unsigned long max_hw_copy_sectors; > > + unsigned long max_copy_sectors; > > + unsigned int max_hw_copy_range_sectors; > > + unsigned int max_copy_range_sectors; > > + unsigned short max_hw_copy_nr_ranges; > > + unsigned short max_copy_nr_ranges; > > Before limits start growing more.. I wonder if we should just > stuff hw offload stuff to its own struct within queue_limits. > > Christoph? > > Luis > Yeah, would like to know community opinion on this. -- Nitesh
On Thu, Feb 17, 2022 at 10:16:21AM +0000, Chaitanya Kulkarni wrote: > On 2/17/22 1:07 AM, Luis Chamberlain wrote: > >> diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > >> index efed3820cbf7..792e6d556589 100644 > >> --- a/include/linux/blkdev.h > >> +++ b/include/linux/blkdev.h > >> @@ -254,6 +254,13 @@ struct queue_limits { > >> unsigned int discard_alignment; > >> unsigned int zone_write_granularity; > >> > >> + unsigned long max_hw_copy_sectors; > >> + unsigned long max_copy_sectors; > >> + unsigned int max_hw_copy_range_sectors; > >> + unsigned int max_copy_range_sectors; > >> + unsigned short max_hw_copy_nr_ranges; > >> + unsigned short max_copy_nr_ranges; > > > > Before limits start growing more.. I wonder if we should just > > stuff hw offload stuff to its own struct within queue_limits. > > > > Christoph? > > > > Potentially use a pointer to structure and maybe make it configurable, Did you mean to make queue limits local or for hw offload and make that a pointer? If so that seems odd because even for hw copy offload we still need the other limits no? So what I meant was that struct queue_limits seems to be getting large, and that hw copy offload seems like an example use case where we should probably use a separate struct for it. And while at it, well, start adding kdocs for these things, because, there's tons of things which could use kdoc love. > although I'm not sure about the later part, I'll let Christoph decide > that. Luis
On Thu, Feb 17, 2022 at 06:29:01PM +0530, Nitesh Shetty wrote: > Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote: > > The subject says limits for copy-offload... > > > > On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote: > > > Add device limits as sysfs entries, > > > - copy_offload (RW) > > > - copy_max_bytes (RW) > > > - copy_max_hw_bytes (RO) > > > - copy_max_range_bytes (RW) > > > - copy_max_range_hw_bytes (RO) > > > - copy_max_nr_ranges (RW) > > > - copy_max_nr_ranges_hw (RO) > > > > Some of these seem like generic... and also I see a few more max_hw ones > > not listed above... > > > queue_limits and sysfs entries are differently named. > All sysfs entries start with copy_* prefix. Also it makes easy to lookup > all copy sysfs. > For queue limits naming, I tried to following existing queue limit > convention (like discard). My point was that your subject seems to indicate the changes are just for copy-offload, but you seem to be adding generic queue limits as well. Is that correct? If so then perhaps the subject should be changed or the patch split up. > > > +static ssize_t queue_copy_offload_store(struct request_queue *q, > > > + const char *page, size_t count) > > > +{ > > > + unsigned long copy_offload; > > > + ssize_t ret = queue_var_store(©_offload, page, count); > > > + > > > + if (ret < 0) > > > + return ret; > > > + > > > + if (copy_offload && !q->limits.max_hw_copy_sectors) > > > + return -EINVAL; > > > > > > If the kernel schedules, copy_offload may still be true and > > max_hw_copy_sectors may be set to 0. Is that an issue? > > > > This check ensures that, we dont enable offload if device doesnt support > offload. I feel it shouldn't be an issue. My point was this: CPU1 CPU2 Time 1) if (copy_offload 2) ---> preemption so it schedules 3) ---> some other high priority task Sets q->limits.max_hw_copy_sectors to 0 4) && !q->limits.max_hw_copy_sectors) Can something bad happen if we allow for this?
On 2/23/22 09:55, Luis Chamberlain wrote: > On Thu, Feb 17, 2022 at 06:29:01PM +0530, Nitesh Shetty wrote: >> Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote: >>> The subject says limits for copy-offload... >>> >>> On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote: >>>> Add device limits as sysfs entries, >>>> - copy_offload (RW) >>>> - copy_max_bytes (RW) >>>> - copy_max_hw_bytes (RO) >>>> - copy_max_range_bytes (RW) >>>> - copy_max_range_hw_bytes (RO) >>>> - copy_max_nr_ranges (RW) >>>> - copy_max_nr_ranges_hw (RO) >>> >>> Some of these seem like generic... and also I see a few more max_hw ones >>> not listed above... >>> >> queue_limits and sysfs entries are differently named. >> All sysfs entries start with copy_* prefix. Also it makes easy to lookup >> all copy sysfs. >> For queue limits naming, I tried to following existing queue limit >> convention (like discard). > > My point was that your subject seems to indicate the changes are just > for copy-offload, but you seem to be adding generic queue limits as > well. Is that correct? If so then perhaps the subject should be changed > or the patch split up. > >>>> +static ssize_t queue_copy_offload_store(struct request_queue *q, >>>> + const char *page, size_t count) >>>> +{ >>>> + unsigned long copy_offload; >>>> + ssize_t ret = queue_var_store(©_offload, page, count); >>>> + >>>> + if (ret < 0) >>>> + return ret; >>>> + >>>> + if (copy_offload && !q->limits.max_hw_copy_sectors) >>>> + return -EINVAL; >>> >>> >>> If the kernel schedules, copy_offload may still be true and >>> max_hw_copy_sectors may be set to 0. Is that an issue? >>> >> >> This check ensures that, we dont enable offload if device doesnt support >> offload. I feel it shouldn't be an issue. > > My point was this: > > CPU1 CPU2 > Time > 1) if (copy_offload > 2) ---> preemption so it schedules > 3) ---> some other high priority task Sets q->limits.max_hw_copy_sectors to 0 > 4) && !q->limits.max_hw_copy_sectors) > > Can something bad happen if we allow for this? max_hw_copy_sectors describes the device capability to offload copy. So this is read-only and "max_hw_copy_sectors != 0" means that the device supports copy offload (this attribute should really be named max_hw_copy_offload_sectors). The actual loop to issue copy offload BIOs, however, must use the soft version of the attribute: max_copy_sectors, which defaults to max_hw_copy_sectors if copy offload is truned on and I guess to max_sectors for the emulation case. Now, with this in mind, I do not see how allowing max_copy_sectors to be 0 makes sense. I fail to see why that should be allowed since: 1) If copy_offload is true, we will rely on the device and chunk copy offload BIOs up to max_copy_sectors 2) If copy_offload is false (or device does not support it), emulation will be used by issuing read/write BIOs of up to max_copy_sectors. Thus max_copy_sectors must always be at least equal to the device minimum IO size, that is, the logical block size.
On Tue, Feb 22, 2022 at 04:55:41PM -0800, Luis Chamberlain wrote: > On Thu, Feb 17, 2022 at 06:29:01PM +0530, Nitesh Shetty wrote: > > Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote: > > > The subject says limits for copy-offload... > > > > > > On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote: > > > > Add device limits as sysfs entries, > > > > - copy_offload (RW) > > > > - copy_max_bytes (RW) > > > > - copy_max_hw_bytes (RO) > > > > - copy_max_range_bytes (RW) > > > > - copy_max_range_hw_bytes (RO) > > > > - copy_max_nr_ranges (RW) > > > > - copy_max_nr_ranges_hw (RO) > > > > > > Some of these seem like generic... and also I see a few more max_hw ones > > > not listed above... > > > > > queue_limits and sysfs entries are differently named. > > All sysfs entries start with copy_* prefix. Also it makes easy to lookup > > all copy sysfs. > > For queue limits naming, I tried to following existing queue limit > > convention (like discard). > > My point was that your subject seems to indicate the changes are just > for copy-offload, but you seem to be adding generic queue limits as > well. Is that correct? If so then perhaps the subject should be changed > or the patch split up. > Yeah, queue limits indicates copy offload. I think will make more readable by adding copy_offload_* prefix. > > > > +static ssize_t queue_copy_offload_store(struct request_queue *q, > > > > + const char *page, size_t count) > > > > +{ > > > > + unsigned long copy_offload; > > > > + ssize_t ret = queue_var_store(©_offload, page, count); > > > > + > > > > + if (ret < 0) > > > > + return ret; > > > > + > > > > + if (copy_offload && !q->limits.max_hw_copy_sectors) > > > > + return -EINVAL; > > > > > > > > > If the kernel schedules, copy_offload may still be true and > > > max_hw_copy_sectors may be set to 0. Is that an issue? > > > > > > > This check ensures that, we dont enable offload if device doesnt support > > offload. I feel it shouldn't be an issue. > > My point was this: > > CPU1 CPU2 > Time > 1) if (copy_offload > 2) ---> preemption so it schedules > 3) ---> some other high priority task Sets q->limits.max_hw_copy_sectors to 0 > 4) && !q->limits.max_hw_copy_sectors) > > Can something bad happen if we allow for this? > > max_hw_copy_sectors is read only for user. And inside kernel, this is set only by driver at initialization.
On Wed, Feb 23, 2022 at 10:29:18AM +0900, Damien Le Moal wrote: > On 2/23/22 09:55, Luis Chamberlain wrote: > > On Thu, Feb 17, 2022 at 06:29:01PM +0530, Nitesh Shetty wrote: > >> Thu, Feb 17, 2022 at 01:07:00AM -0800, Luis Chamberlain wrote: > >>> The subject says limits for copy-offload... > >>> > >>> On Mon, Feb 14, 2022 at 01:29:52PM +0530, Nitesh Shetty wrote: > >>>> Add device limits as sysfs entries, > >>>> - copy_offload (RW) > >>>> - copy_max_bytes (RW) > >>>> - copy_max_hw_bytes (RO) > >>>> - copy_max_range_bytes (RW) > >>>> - copy_max_range_hw_bytes (RO) > >>>> - copy_max_nr_ranges (RW) > >>>> - copy_max_nr_ranges_hw (RO) > >>> > >>> Some of these seem like generic... and also I see a few more max_hw ones > >>> not listed above... > >>> > >> queue_limits and sysfs entries are differently named. > >> All sysfs entries start with copy_* prefix. Also it makes easy to lookup > >> all copy sysfs. > >> For queue limits naming, I tried to following existing queue limit > >> convention (like discard). > > > > My point was that your subject seems to indicate the changes are just > > for copy-offload, but you seem to be adding generic queue limits as > > well. Is that correct? If so then perhaps the subject should be changed > > or the patch split up. > > > >>>> +static ssize_t queue_copy_offload_store(struct request_queue *q, > >>>> + const char *page, size_t count) > >>>> +{ > >>>> + unsigned long copy_offload; > >>>> + ssize_t ret = queue_var_store(©_offload, page, count); > >>>> + > >>>> + if (ret < 0) > >>>> + return ret; > >>>> + > >>>> + if (copy_offload && !q->limits.max_hw_copy_sectors) > >>>> + return -EINVAL; > >>> > >>> > >>> If the kernel schedules, copy_offload may still be true and > >>> max_hw_copy_sectors may be set to 0. Is that an issue? > >>> > >> > >> This check ensures that, we dont enable offload if device doesnt support > >> offload. I feel it shouldn't be an issue. > > > > My point was this: > > > > CPU1 CPU2 > > Time > > 1) if (copy_offload > > 2) ---> preemption so it schedules > > 3) ---> some other high priority task Sets q->limits.max_hw_copy_sectors to 0 > > 4) && !q->limits.max_hw_copy_sectors) > > > > Can something bad happen if we allow for this? > > max_hw_copy_sectors describes the device capability to offload copy. So > this is read-only and "max_hw_copy_sectors != 0" means that the device > supports copy offload (this attribute should really be named > max_hw_copy_offload_sectors). > Yes, it does make sense to change prefix to copy_offload_*, but downside being sysfs attributes becomes too long. > The actual loop to issue copy offload BIOs, however, must use the soft > version of the attribute: max_copy_sectors, which defaults to > max_hw_copy_sectors if copy offload is truned on and I guess to > max_sectors for the emulation case. > > Now, with this in mind, I do not see how allowing max_copy_sectors to be > 0 makes sense. I fail to see why that should be allowed since: > 1) If copy_offload is true, we will rely on the device and chunk copy > offload BIOs up to max_copy_sectors > 2) If copy_offload is false (or device does not support it), emulation > will be used by issuing read/write BIOs of up to max_copy_sectors. > > Thus max_copy_sectors must always be at least equal to the device > minimum IO size, that is, the logical block size. Agreed, if device doesn't suppport offload, soft limit should be based on limits of READ/WRITE IOs. -- Nitesh Shetty
diff --git a/block/blk-settings.c b/block/blk-settings.c index b880c70e22e4..4baccc93a294 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -57,6 +57,12 @@ void blk_set_default_limits(struct queue_limits *lim) lim->misaligned = 0; lim->zoned = BLK_ZONED_NONE; lim->zone_write_granularity = 0; + lim->max_hw_copy_sectors = 0; + lim->max_copy_sectors = 0; + lim->max_hw_copy_nr_ranges = 0; + lim->max_copy_nr_ranges = 0; + lim->max_hw_copy_range_sectors = 0; + lim->max_copy_range_sectors = 0; } EXPORT_SYMBOL(blk_set_default_limits); @@ -82,6 +88,12 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_write_same_sectors = UINT_MAX; lim->max_write_zeroes_sectors = UINT_MAX; lim->max_zone_append_sectors = UINT_MAX; + lim->max_hw_copy_sectors = ULONG_MAX; + lim->max_copy_sectors = ULONG_MAX; + lim->max_hw_copy_range_sectors = UINT_MAX; + lim->max_copy_range_sectors = UINT_MAX; + lim->max_hw_copy_nr_ranges = USHRT_MAX; + lim->max_copy_nr_ranges = USHRT_MAX; } EXPORT_SYMBOL(blk_set_stacking_limits); @@ -178,6 +190,45 @@ void blk_queue_max_discard_sectors(struct request_queue *q, } EXPORT_SYMBOL(blk_queue_max_discard_sectors); +/** + * blk_queue_max_copy_sectors - set max sectors for a single copy payload + * @q: the request queue for the device + * @max_copy_sectors: maximum number of sectors to copy + **/ +void blk_queue_max_copy_sectors(struct request_queue *q, + unsigned int max_copy_sectors) +{ + q->limits.max_hw_copy_sectors = max_copy_sectors; + q->limits.max_copy_sectors = max_copy_sectors; +} +EXPORT_SYMBOL(blk_queue_max_copy_sectors); + +/** + * blk_queue_max_copy_range_sectors - set max sectors for a single range, in a copy payload + * @q: the request queue for the device + * @max_copy_range_sectors: maximum number of sectors to copy in a single range + **/ +void blk_queue_max_copy_range_sectors(struct request_queue *q, + unsigned int max_copy_range_sectors) +{ + q->limits.max_hw_copy_range_sectors = max_copy_range_sectors; + q->limits.max_copy_range_sectors = max_copy_range_sectors; +} +EXPORT_SYMBOL(blk_queue_max_copy_range_sectors); + +/** + * blk_queue_max_copy_nr_ranges - set max number of ranges, in a copy payload + * @q: the request queue for the device + * @max_copy_nr_ranges: maximum number of ranges + **/ +void blk_queue_max_copy_nr_ranges(struct request_queue *q, + unsigned int max_copy_nr_ranges) +{ + q->limits.max_hw_copy_nr_ranges = max_copy_nr_ranges; + q->limits.max_copy_nr_ranges = max_copy_nr_ranges; +} +EXPORT_SYMBOL(blk_queue_max_copy_nr_ranges); + /** * blk_queue_max_write_same_sectors - set max sectors for a single write same * @q: the request queue for the device @@ -541,6 +592,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); + t->max_copy_sectors = min(t->max_copy_sectors, b->max_copy_sectors); + t->max_hw_copy_sectors = min(t->max_hw_copy_sectors, b->max_hw_copy_sectors); + t->max_copy_range_sectors = min(t->max_copy_range_sectors, b->max_copy_range_sectors); + t->max_hw_copy_range_sectors = min(t->max_hw_copy_range_sectors, + b->max_hw_copy_range_sectors); + t->max_copy_nr_ranges = min(t->max_copy_nr_ranges, b->max_copy_nr_ranges); + t->max_hw_copy_nr_ranges = min(t->max_hw_copy_nr_ranges, b->max_hw_copy_nr_ranges); + t->misaligned |= b->misaligned; alignment = queue_limit_alignment_offset(b, start); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9f32882ceb2f..9ddd07f142d9 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -212,6 +212,129 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag return queue_var_show(0, page); } +static ssize_t queue_copy_offload_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_copy(q), page); +} + +static ssize_t queue_copy_offload_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long copy_offload; + ssize_t ret = queue_var_store(©_offload, page, count); + + if (ret < 0) + return ret; + + if (copy_offload && !q->limits.max_hw_copy_sectors) + return -EINVAL; + + if (copy_offload) + blk_queue_flag_set(QUEUE_FLAG_COPY, q); + else + blk_queue_flag_clear(QUEUE_FLAG_COPY, q); + + return ret; +} + +static ssize_t queue_copy_max_hw_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_hw_copy_sectors << 9); +} + +static ssize_t queue_copy_max_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_copy_sectors << 9); +} + +static ssize_t queue_copy_max_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long max_copy; + ssize_t ret = queue_var_store(&max_copy, page, count); + + if (ret < 0) + return ret; + + if (max_copy & (queue_logical_block_size(q) - 1)) + return -EINVAL; + + max_copy >>= 9; + if (max_copy > q->limits.max_hw_copy_sectors) + max_copy = q->limits.max_hw_copy_sectors; + + q->limits.max_copy_sectors = max_copy; + return ret; +} + +static ssize_t queue_copy_range_max_hw_show(struct request_queue *q, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_hw_copy_range_sectors << 9); +} + +static ssize_t queue_copy_range_max_show(struct request_queue *q, + char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)q->limits.max_copy_range_sectors << 9); +} + +static ssize_t queue_copy_range_max_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long max_copy; + ssize_t ret = queue_var_store(&max_copy, page, count); + + if (ret < 0) + return ret; + + if (max_copy & (queue_logical_block_size(q) - 1)) + return -EINVAL; + + max_copy >>= 9; + if (max_copy > UINT_MAX) + return -EINVAL; + + if (max_copy > q->limits.max_hw_copy_range_sectors) + max_copy = q->limits.max_hw_copy_range_sectors; + + q->limits.max_copy_range_sectors = max_copy; + return ret; +} + +static ssize_t queue_copy_nr_ranges_max_hw_show(struct request_queue *q, char *page) +{ + return queue_var_show(q->limits.max_hw_copy_nr_ranges, page); +} + +static ssize_t queue_copy_nr_ranges_max_show(struct request_queue *q, + char *page) +{ + return queue_var_show(q->limits.max_copy_nr_ranges, page); +} + +static ssize_t queue_copy_nr_ranges_max_store(struct request_queue *q, + const char *page, size_t count) +{ + unsigned long max_nr; + ssize_t ret = queue_var_store(&max_nr, page, count); + + if (ret < 0) + return ret; + + if (max_nr > USHRT_MAX) + return -EINVAL; + + if (max_nr > q->limits.max_hw_copy_nr_ranges) + max_nr = q->limits.max_hw_copy_nr_ranges; + + q->limits.max_copy_nr_ranges = max_nr; + return ret; +} + static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) { return sprintf(page, "%llu\n", @@ -597,6 +720,14 @@ QUEUE_RO_ENTRY(queue_nr_zones, "nr_zones"); QUEUE_RO_ENTRY(queue_max_open_zones, "max_open_zones"); QUEUE_RO_ENTRY(queue_max_active_zones, "max_active_zones"); +QUEUE_RW_ENTRY(queue_copy_offload, "copy_offload"); +QUEUE_RO_ENTRY(queue_copy_max_hw, "copy_max_hw_bytes"); +QUEUE_RW_ENTRY(queue_copy_max, "copy_max_bytes"); +QUEUE_RO_ENTRY(queue_copy_range_max_hw, "copy_max_range_hw_bytes"); +QUEUE_RW_ENTRY(queue_copy_range_max, "copy_max_range_bytes"); +QUEUE_RO_ENTRY(queue_copy_nr_ranges_max_hw, "copy_max_nr_ranges_hw"); +QUEUE_RW_ENTRY(queue_copy_nr_ranges_max, "copy_max_nr_ranges"); + QUEUE_RW_ENTRY(queue_nomerges, "nomerges"); QUEUE_RW_ENTRY(queue_rq_affinity, "rq_affinity"); QUEUE_RW_ENTRY(queue_poll, "io_poll"); @@ -643,6 +774,13 @@ static struct attribute *queue_attrs[] = { &queue_discard_max_entry.attr, &queue_discard_max_hw_entry.attr, &queue_discard_zeroes_data_entry.attr, + &queue_copy_offload_entry.attr, + &queue_copy_max_hw_entry.attr, + &queue_copy_max_entry.attr, + &queue_copy_range_max_hw_entry.attr, + &queue_copy_range_max_entry.attr, + &queue_copy_nr_ranges_max_hw_entry.attr, + &queue_copy_nr_ranges_max_entry.attr, &queue_write_same_max_entry.attr, &queue_write_zeroes_max_entry.attr, &queue_zone_append_max_entry.attr, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index efed3820cbf7..792e6d556589 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -254,6 +254,13 @@ struct queue_limits { unsigned int discard_alignment; unsigned int zone_write_granularity; + unsigned long max_hw_copy_sectors; + unsigned long max_copy_sectors; + unsigned int max_hw_copy_range_sectors; + unsigned int max_copy_range_sectors; + unsigned short max_hw_copy_nr_ranges; + unsigned short max_copy_nr_ranges; + unsigned short max_segments; unsigned short max_integrity_segments; unsigned short max_discard_segments; @@ -562,6 +569,7 @@ struct request_queue { #define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */ #define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */ #define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */ +#define QUEUE_FLAG_COPY 30 /* supports copy offload */ #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -585,6 +593,7 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) +#define blk_queue_copy(q) test_bit(QUEUE_FLAG_COPY, &(q)->queue_flags) #define blk_queue_zone_resetall(q) \ test_bit(QUEUE_FLAG_ZONE_RESETALL, &(q)->queue_flags) #define blk_queue_secure_erase(q) \ @@ -958,6 +967,10 @@ extern void blk_queue_max_discard_segments(struct request_queue *, extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); +extern void blk_queue_max_copy_sectors(struct request_queue *q, unsigned int max_copy_sectors); +extern void blk_queue_max_copy_range_sectors(struct request_queue *q, + unsigned int max_copy_range_sectors); +extern void blk_queue_max_copy_nr_ranges(struct request_queue *q, unsigned int max_copy_nr_ranges); extern void blk_queue_max_write_same_sectors(struct request_queue *q, unsigned int max_write_same_sectors); extern void blk_queue_max_write_zeroes_sectors(struct request_queue *q,