Message ID | 20231114211804.1449162-2-bvanassche@acm.org |
---|---|
State | New |
Headers | show |
Series | Improve write performance for zoned UFS devices​ | expand |
On 11/15/23 06:16, Bart Van Assche wrote: > Many but not all storage controllers require serialization of zoned writes. > Introduce two new request queue limit member variables related to write > serialization. 'driver_preserves_write_order' allows block drivers to > indicate that the order of write commands is preserved and hence that > serialization of writes per zone is not required. 'use_zone_write_lock' is > set by disk_set_zoned() if and only if the block device has zones and if > the block driver does not preserve the order of write requests. > > Reviewed-by: Damien Le Moal <dlemoal@kernel.org> > Reviewed-by: Hannes Reinecke <hare@suse.de> > Reviewed-by: Nitesh Shetty <nj.shetty@samsung.com> > Cc: Christoph Hellwig <hch@lst.de> > Cc: Ming Lei <ming.lei@redhat.com> > Signed-off-by: Bart Van Assche <bvanassche@acm.org> > --- > block/blk-settings.c | 15 +++++++++++++++ > block/blk-zoned.c | 1 + > include/linux/blkdev.h | 10 ++++++++++ > 3 files changed, 26 insertions(+) > > diff --git a/block/blk-settings.c b/block/blk-settings.c > index 0046b447268f..4c776c08f190 100644 > --- a/block/blk-settings.c > +++ b/block/blk-settings.c > @@ -56,6 +56,8 @@ void blk_set_default_limits(struct queue_limits *lim) > lim->alignment_offset = 0; > lim->io_opt = 0; > lim->misaligned = 0; > + lim->driver_preserves_write_order = false; > + lim->use_zone_write_lock = false; > lim->zoned = BLK_ZONED_NONE; > lim->zone_write_granularity = 0; > lim->dma_alignment = 511; > @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim) > lim->max_dev_sectors = UINT_MAX; > lim->max_write_zeroes_sectors = UINT_MAX; > lim->max_zone_append_sectors = UINT_MAX; > + /* Request-based stacking drivers do not reorder requests. */ Rereading this patch, I do not think this statement is correct. I seriously doubt that multipath will preserve write command order in all cases... > + lim->driver_preserves_write_order = true; ... so it is likely much safer to set the default to "false" as that is the default for all requests in general. > } > EXPORT_SYMBOL(blk_set_stacking_limits); > > @@ -685,6 +689,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, > b->max_secure_erase_sectors); > t->zone_write_granularity = max(t->zone_write_granularity, > b->zone_write_granularity); > + t->driver_preserves_write_order = t->driver_preserves_write_order && > + b->driver_preserves_write_order; > + t->use_zone_write_lock = t->use_zone_write_lock || > + b->use_zone_write_lock; Very minor nit: splitting the line after the equal would make this more readable. > t->zoned = max(t->zoned, b->zoned); > return ret; > } > @@ -949,6 +957,13 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) > } > > q->limits.zoned = model; > + /* > + * Use the zone write lock only for zoned block devices and only if > + * the block driver does not preserve the order of write commands. > + */ > + q->limits.use_zone_write_lock = model != BLK_ZONED_NONE && > + !q->limits.driver_preserves_write_order; > + > if (model != BLK_ZONED_NONE) { > /* > * Set the zone write granularity to the device logical block > diff --git a/block/blk-zoned.c b/block/blk-zoned.c > index 619ee41a51cc..112620985bff 100644 > --- a/block/blk-zoned.c > +++ b/block/blk-zoned.c > @@ -631,6 +631,7 @@ void disk_clear_zone_settings(struct gendisk *disk) > q->limits.chunk_sectors = 0; > q->limits.zone_write_granularity = 0; > q->limits.max_zone_append_sectors = 0; > + q->limits.use_zone_write_lock = false; > > blk_mq_unfreeze_queue(q); > } > diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h > index 51fa7ffdee83..2d452f5a36c8 100644 > --- a/include/linux/blkdev.h > +++ b/include/linux/blkdev.h > @@ -316,6 +316,16 @@ struct queue_limits { > unsigned char misaligned; > unsigned char discard_misaligned; > unsigned char raid_partial_stripes_expensive; > + /* > + * Whether or not the block driver preserves the order of write > + * requests. Set by the block driver. > + */ > + bool driver_preserves_write_order; > + /* > + * Whether or not zone write locking should be used. Set by > + * disk_set_zoned(). > + */ > + bool use_zone_write_lock; > enum blk_zoned_model zoned; > > /*
On 11/19/23 15:29, Damien Le Moal wrote: > On 11/15/23 06:16, Bart Van Assche wrote: >> @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim) >> lim->max_dev_sectors = UINT_MAX; >> lim->max_write_zeroes_sectors = UINT_MAX; >> lim->max_zone_append_sectors = UINT_MAX; >> + /* Request-based stacking drivers do not reorder requests. */ > > Rereading this patch, I do not think this statement is correct. I seriously > doubt that multipath will preserve write command order in all cases... > >> + lim->driver_preserves_write_order = true; > > ... so it is likely much safer to set the default to "false" as that is the > default for all requests in general. How about applying this (untested) patch on top of this patch series? diff --git a/block/blk-settings.c b/block/blk-settings.c index 4c776c08f190..aba1972e9767 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -84,8 +84,6 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_dev_sectors = UINT_MAX; lim->max_write_zeroes_sectors = UINT_MAX; lim->max_zone_append_sectors = UINT_MAX; - /* Request-based stacking drivers do not reorder requests. */ - lim->driver_preserves_write_order = true; } EXPORT_SYMBOL(blk_set_stacking_limits); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 2d3e186ca87e..cb9abe4bd065 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -147,6 +147,11 @@ static int linear_report_zones(struct dm_target *ti, #define linear_report_zones NULL #endif +static void linear_io_hints(struct dm_target *ti, struct queue_limits *limits) +{ + limits->driver_preserves_write_order = true; +} + static int linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { @@ -208,6 +213,7 @@ static struct target_type linear_target = { .map = linear_map, .status = linear_status, .prepare_ioctl = linear_prepare_ioctl, + .io_hints = linear_io_hints, .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_zero_page_range = linear_dax_zero_page_range, >> @@ -685,6 +689,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, >> b->max_secure_erase_sectors); >> t->zone_write_granularity = max(t->zone_write_granularity, >> b->zone_write_granularity); >> + t->driver_preserves_write_order = t->driver_preserves_write_order && >> + b->driver_preserves_write_order; >> + t->use_zone_write_lock = t->use_zone_write_lock || >> + b->use_zone_write_lock; > > Very minor nit: splitting the line after the equal would make this more readable. Hmm ... I have often seen other reviewers asking to maximize the use of each source code line as much as reasonably possible. Thanks, Bart.
diff --git a/block/blk-settings.c b/block/blk-settings.c index 0046b447268f..4c776c08f190 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -56,6 +56,8 @@ void blk_set_default_limits(struct queue_limits *lim) lim->alignment_offset = 0; lim->io_opt = 0; lim->misaligned = 0; + lim->driver_preserves_write_order = false; + lim->use_zone_write_lock = false; lim->zoned = BLK_ZONED_NONE; lim->zone_write_granularity = 0; lim->dma_alignment = 511; @@ -82,6 +84,8 @@ void blk_set_stacking_limits(struct queue_limits *lim) lim->max_dev_sectors = UINT_MAX; lim->max_write_zeroes_sectors = UINT_MAX; lim->max_zone_append_sectors = UINT_MAX; + /* Request-based stacking drivers do not reorder requests. */ + lim->driver_preserves_write_order = true; } EXPORT_SYMBOL(blk_set_stacking_limits); @@ -685,6 +689,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_secure_erase_sectors); t->zone_write_granularity = max(t->zone_write_granularity, b->zone_write_granularity); + t->driver_preserves_write_order = t->driver_preserves_write_order && + b->driver_preserves_write_order; + t->use_zone_write_lock = t->use_zone_write_lock || + b->use_zone_write_lock; t->zoned = max(t->zoned, b->zoned); return ret; } @@ -949,6 +957,13 @@ void disk_set_zoned(struct gendisk *disk, enum blk_zoned_model model) } q->limits.zoned = model; + /* + * Use the zone write lock only for zoned block devices and only if + * the block driver does not preserve the order of write commands. + */ + q->limits.use_zone_write_lock = model != BLK_ZONED_NONE && + !q->limits.driver_preserves_write_order; + if (model != BLK_ZONED_NONE) { /* * Set the zone write granularity to the device logical block diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 619ee41a51cc..112620985bff 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -631,6 +631,7 @@ void disk_clear_zone_settings(struct gendisk *disk) q->limits.chunk_sectors = 0; q->limits.zone_write_granularity = 0; q->limits.max_zone_append_sectors = 0; + q->limits.use_zone_write_lock = false; blk_mq_unfreeze_queue(q); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 51fa7ffdee83..2d452f5a36c8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -316,6 +316,16 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char raid_partial_stripes_expensive; + /* + * Whether or not the block driver preserves the order of write + * requests. Set by the block driver. + */ + bool driver_preserves_write_order; + /* + * Whether or not zone write locking should be used. Set by + * disk_set_zoned(). + */ + bool use_zone_write_lock; enum blk_zoned_model zoned; /*