diff mbox series

[PATCHv3,06/10] blk-integrity: simplify counting segments

Message ID 20240904152605.4055570-7-kbusch@meta.com
State New
Headers show
Series block integrity merging and counting | expand

Commit Message

Keith Busch Sept. 4, 2024, 3:26 p.m. UTC
From: Keith Busch <kbusch@kernel.org>

The segments are already packed to the queue limits when adding them to
the bio, so each vector is already its own segment. No need to attempt
compacting them even more. And give the function a more appropriate
name, since we're counting segments, not scatter-gather elements.

Signed-off-by: Keith Busch <kbusch@kernel.org>
---
 block/blk-integrity.c         | 33 ++++++---------------------------
 block/blk-mq.c                |  2 +-
 include/linux/blk-integrity.h |  5 ++---
 3 files changed, 9 insertions(+), 31 deletions(-)

Comments

Christoph Hellwig Sept. 10, 2024, 3:41 p.m. UTC | #1
On Wed, Sep 04, 2024 at 08:26:01AM -0700, Keith Busch wrote:
> From: Keith Busch <kbusch@kernel.org>
> 
> The segments are already packed to the queue limits when adding them to
> the bio,

I can't really parse this.  I guess this talks about
bio_integrity_add_page trying to append the payload to the last
vector when possible?

> -int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
> +int blk_rq_count_integrity_segs(struct bio *bio)
>  {
> -	struct bio_vec iv, ivprv = { NULL };
>  	unsigned int segments = 0;
> -	unsigned int seg_size = 0;
> -	struct bvec_iter iter;
> -	int prev = 0;
> -
> -	bio_for_each_integrity_vec(iv, bio, iter) {
>  
> -		if (prev) {
> -			if (!biovec_phys_mergeable(q, &ivprv, &iv))
> -				goto new_segment;
> -			if (seg_size + iv.bv_len > queue_max_segment_size(q))
> -				goto new_segment;
> -
> -			seg_size += iv.bv_len;
> -		} else {
> -new_segment:
> -			segments++;
> -			seg_size = iv.bv_len;

Q: for the data path the caller submitted bio_vecs can be larger
than the max segment size, and given that the metadata API tries
to follow that in general, I'd assume we could also get metadata
segments larger than the segment size in theory, in which case
we'd need to split a bvec into multiple segments, similar to what
bvec_split_segs does.  Do we need similar handling for metadata?
Or are we going to say that metadata must e.g. always be smaller
than PAGE_SIZE as max_segment_sizse must be >= PAGE_SIZE?

> +	for_each_bio(bio)
> +		segments += bio->bi_integrity->bip_vcnt;

If a bio was cloned bip_vcnt isn't the correct value here,
we'll need to use the iter to count the segments.

>  	bio->bi_next = NULL;
> -	nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
> +	nr_integrity_segs = blk_rq_count_integrity_segs(bio);
>  	bio->bi_next = next;

And instead of playing the magic with the bio chain here, I'd have
a low-level helper to count the bio segments here.

>  
>  	if (req->nr_integrity_segments + nr_integrity_segs >
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 3ed5181c75610..79cc66275f1cd 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -2548,7 +2548,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
>  	blk_rq_bio_prep(rq, bio, nr_segs);
>  #if defined(CONFIG_BLK_DEV_INTEGRITY)
>  	if (bio->bi_opf & REQ_INTEGRITY)
> -		rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, bio);
> +		rq->nr_integrity_segments = blk_rq_count_integrity_segs(bio);

And here I'm actually pretty sure this is always a single bio and not
a chain either.
Keith Busch Sept. 10, 2024, 4:06 p.m. UTC | #2
On Tue, Sep 10, 2024 at 05:41:05PM +0200, Christoph Hellwig wrote:
> On Wed, Sep 04, 2024 at 08:26:01AM -0700, Keith Busch wrote:
> > From: Keith Busch <kbusch@kernel.org>
> > 
> > The segments are already packed to the queue limits when adding them to
> > the bio,
> 
> I can't really parse this.  I guess this talks about
> bio_integrity_add_page trying to append the payload to the last
> vector when possible?

Exactly. bio_integrity_add_page will use the queue's limits to decide if
it can combine pages into one vector, so appending pages through that
interface will always result in the most compact bip vector.

This patch doesn't combine merged bio's but that's unlikely to have
mergable segments.
 
> > -int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
> > +int blk_rq_count_integrity_segs(struct bio *bio)
> >  {
> > -	struct bio_vec iv, ivprv = { NULL };
> >  	unsigned int segments = 0;
> > -	unsigned int seg_size = 0;
> > -	struct bvec_iter iter;
> > -	int prev = 0;
> > -
> > -	bio_for_each_integrity_vec(iv, bio, iter) {
> >  
> > -		if (prev) {
> > -			if (!biovec_phys_mergeable(q, &ivprv, &iv))
> > -				goto new_segment;
> > -			if (seg_size + iv.bv_len > queue_max_segment_size(q))
> > -				goto new_segment;
> > -
> > -			seg_size += iv.bv_len;
> > -		} else {
> > -new_segment:
> > -			segments++;
> > -			seg_size = iv.bv_len;
> 
> Q: for the data path the caller submitted bio_vecs can be larger
> than the max segment size, and given that the metadata API tries
> to follow that in general, I'd assume we could also get metadata
> segments larger than the segment size in theory, in which case
> we'd need to split a bvec into multiple segments, similar to what
> bvec_split_segs does.  Do we need similar handling for metadata?
> Or are we going to say that metadata must e.g. always be smaller
> than PAGE_SIZE as max_segment_sizse must be >= PAGE_SIZE?

The common use cases don't add integrity data until after the bio is
already split in __bio_split_to_limits(), and it won't be split again
after integrity is added via bio_integrity_prep(). The common path
always adds integrity in a single segment, so it's always valid.

There are just a few other users that set their own bio integrity before
submitting (the nvme and scsi target drivers), and I think both can
break from possible bio splitting, but I haven't been able to test
those. 
 
> > +	for_each_bio(bio)
> > +		segments += bio->bi_integrity->bip_vcnt;
> 
> If a bio was cloned bip_vcnt isn't the correct value here,
> we'll need to use the iter to count the segments.

Darn. The common use case doesn't have integrity added until just before
it's dispatched, so the integrity cloning doesn't normally happen for
that case.

Let's just drop patches 6 and 7 from consideration for now. They are a
bit too optimistic, and doesn't really fix anything anyway.
Christoph Hellwig Sept. 11, 2024, 8:17 a.m. UTC | #3
On Tue, Sep 10, 2024 at 10:06:03AM -0600, Keith Busch wrote:
> Exactly. bio_integrity_add_page will use the queue's limits to decide if
> it can combine pages into one vector, so appending pages through that
> interface will always result in the most compact bip vector.
> 
> This patch doesn't combine merged bio's but that's unlikely to have
> mergable segments.

Oh, bio_integrity_add_page uses bvec_try_merge_hw_page.  That means it
doesn't really work well for our stacking model, as any stacking driver
can and will change these.  Maybe we need to take a step back and fully
apply the immutable biovec and split at final submission model to
metadata?

> The common use cases don't add integrity data until after the bio is
> already split in __bio_split_to_limits(), and it won't be split again
> after integrity is added via bio_integrity_prep(). The common path
> always adds integrity in a single segment, so it's always valid.

Where common case is the somewhat awful auto PI in the lowest level
driver.  I'd really much prefer to move to the file system adding
the PI wherever possible, as that way it can actually look into it
(and return it to the driver, etc).

> There are just a few other users that set their own bio integrity before
> submitting (the nvme and scsi target drivers), and I think both can
> break from possible bio splitting, but I haven't been able to test
> those. 

Yes.  Plus dm-integrity and the new io_uring read/write with PI code
that's being submitted. I plan to also support this from the file
system eventually.  None of these seems widely used, which I think
explains the current messy state of PI as soon as merging/splitting
or remapping is involved.
Keith Busch Sept. 11, 2024, 3:28 p.m. UTC | #4
On Wed, Sep 11, 2024 at 10:17:20AM +0200, Christoph Hellwig wrote:
> On Tue, Sep 10, 2024 at 10:06:03AM -0600, Keith Busch wrote:
> > Exactly. bio_integrity_add_page will use the queue's limits to decide if
> > it can combine pages into one vector, so appending pages through that
> > interface will always result in the most compact bip vector.
> > 
> > This patch doesn't combine merged bio's but that's unlikely to have
> > mergable segments.
> 
> Oh, bio_integrity_add_page uses bvec_try_merge_hw_page.  That means it
> doesn't really work well for our stacking model, as any stacking driver
> can and will change these.  Maybe we need to take a step back and fully
> apply the immutable biovec and split at final submission model to
> metadata?

Would you be okay if I resubmit this patchset with just the minimum to
fix the existing merging? I agree stacking and splitting integrity is
currently broken, but I think the merging fixes from this series need to
happen regardless of the how the block stack might change when integrity
data is set in bios.
Christoph Hellwig Sept. 12, 2024, 7:01 a.m. UTC | #5
On Wed, Sep 11, 2024 at 09:28:38AM -0600, Keith Busch wrote:
> Would you be okay if I resubmit this patchset with just the minimum to
> fix the existing merging? I agree stacking and splitting integrity is
> currently broken, but I think the merging fixes from this series need to
> happen regardless of the how the block stack might change when integrity
> data is set in bios.

I guess everything that makes it less broken is good.  I'm a little
worried about removing some of the split/count code we'll eventually
need, though.
diff mbox series

Patch

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 4365960153b91..c180141b7871c 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -17,39 +17,18 @@ 
 #include "blk.h"
 
 /**
- * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
- * @q:		request queue
+ * blk_rq_count_integrity_segs - Count number of integrity segments
  * @bio:	bio with integrity metadata attached
  *
  * Description: Returns the number of elements required in a
  * scatterlist corresponding to the integrity metadata in a bio.
  */
-int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
+int blk_rq_count_integrity_segs(struct bio *bio)
 {
-	struct bio_vec iv, ivprv = { NULL };
 	unsigned int segments = 0;
-	unsigned int seg_size = 0;
-	struct bvec_iter iter;
-	int prev = 0;
-
-	bio_for_each_integrity_vec(iv, bio, iter) {
 
-		if (prev) {
-			if (!biovec_phys_mergeable(q, &ivprv, &iv))
-				goto new_segment;
-			if (seg_size + iv.bv_len > queue_max_segment_size(q))
-				goto new_segment;
-
-			seg_size += iv.bv_len;
-		} else {
-new_segment:
-			segments++;
-			seg_size = iv.bv_len;
-		}
-
-		prev = 1;
-		ivprv = iv;
-	}
+	for_each_bio(bio)
+		segments += bio->bi_integrity->bip_vcnt;
 
 	return segments;
 }
@@ -62,7 +41,7 @@  int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
  *
  * Description: Map the integrity vectors in request into a
  * scatterlist.  The scatterlist must be big enough to hold all
- * elements.  I.e. sized using blk_rq_count_integrity_sg().
+ * elements.  I.e. sized using blk_rq_count_integrity_segs().
  */
 int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
 			    struct scatterlist *sglist)
@@ -145,7 +124,7 @@  bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
 		return false;
 
 	bio->bi_next = NULL;
-	nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
+	nr_integrity_segs = blk_rq_count_integrity_segs(bio);
 	bio->bi_next = next;
 
 	if (req->nr_integrity_segments + nr_integrity_segs >
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 3ed5181c75610..79cc66275f1cd 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2548,7 +2548,7 @@  static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
 	blk_rq_bio_prep(rq, bio, nr_segs);
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	if (bio->bi_opf & REQ_INTEGRITY)
-		rq->nr_integrity_segments = blk_rq_count_integrity_sg(rq->q, bio);
+		rq->nr_integrity_segments = blk_rq_count_integrity_segs(bio);
 #endif
 
 	/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index de98049b7ded9..0de05278ac824 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -27,7 +27,7 @@  static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
 				   struct scatterlist *);
-int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
+int blk_rq_count_integrity_segs(struct bio *);
 
 static inline bool
 blk_integrity_queue_supports_integrity(struct request_queue *q)
@@ -91,8 +91,7 @@  static inline struct bio_vec rq_integrity_vec(struct request *rq)
 				 rq->bio->bi_integrity->bip_iter);
 }
 #else /* CONFIG_BLK_DEV_INTEGRITY */
-static inline int blk_rq_count_integrity_sg(struct request_queue *q,
-					    struct bio *b)
+static inline int blk_rq_count_integrity_segs(struct bio *b)
 {
 	return 0;
 }