Message ID | 20201016045258.16246-3-dgilbert@interlog.com |
---|---|
State | New |
Headers | show |
Series | None | expand |
Hi Douglas, AFAICS this patch - and also patch 3 - are not correct. When started with SG_MITER_ATOMIC, sg_miter_next and sg_miter_stop use the k(un)map_atomic calls. But these have to be used strictly nested according to docu and code. The below code uses the atomic mappings in overlapping mode. Regards, Bodo Am 16.10.20 um 06:52 schrieb Douglas Gilbert: > Both the SCSI and NVMe subsystems receive user data from the block > layer in scatterlist_s (aka scatter gather lists (sgl) which are > often arrays). If drivers in those subsystems represent storage > (e.g. a ramdisk) or cache "hot" user data then they may also > choose to use scatterlist_s. Currently there are no sgl to sgl > operations in the kernel. Start with a copy. > > Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> > --- > include/linux/scatterlist.h | 4 ++ > lib/scatterlist.c | 86 +++++++++++++++++++++++++++++++++++++ > 2 files changed, 90 insertions(+) > > diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h > index 80178afc2a4a..6649414c0749 100644 > --- a/include/linux/scatterlist.h > +++ b/include/linux/scatterlist.h > @@ -321,6 +321,10 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, > size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, > size_t buflen, off_t skip); > > +size_t sgl_copy_sgl(struct scatterlist *d_sgl, unsigned int d_nents, off_t d_skip, > + struct scatterlist *s_sgl, unsigned int s_nents, off_t s_skip, > + size_t n_bytes); > + > /* > * Maximum number of entries that will be allocated in one piece, if > * a list larger than this is required then chaining will be utilized. > diff --git a/lib/scatterlist.c b/lib/scatterlist.c > index d5770e7f1030..1ec2c909c8d4 100644 > --- a/lib/scatterlist.c > +++ b/lib/scatterlist.c > @@ -974,3 +974,89 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, > return offset; > } > EXPORT_SYMBOL(sg_zero_buffer); > + > +/** > + * sgl_copy_sgl - Copy over a destination sgl from a source sgl > + * @d_sgl: Destination sgl > + * @d_nents: Number of SG entries in destination sgl > + * @d_skip: Number of bytes to skip in destination before copying > + * @s_sgl: Source sgl > + * @s_nents: Number of SG entries in source sgl > + * @s_skip: Number of bytes to skip in source before copying > + * @n_bytes: The number of bytes to copy > + * > + * Returns the number of copied bytes. > + * > + * Notes: > + * Destination arguments appear before the source arguments, as with memcpy(). > + * > + * Stops copying if the end of d_sgl or s_sgl is reached. > + * > + * Since memcpy() is used, overlapping copies (where d_sgl and s_sgl belong > + * to the same sgl and the copy regions overlap) are not supported. > + * > + * If d_skip is large, potentially spanning multiple d_nents then some > + * integer arithmetic to adjust d_sgl may improve performance. For example > + * if d_sgl is built using sgl_alloc_order(chainable=false) then the sgl > + * will be an array with equally sized segments facilitating that > + * arithmetic. The suggestion applies to s_skip, s_sgl and s_nents as well. > + * > + **/ > +size_t sgl_copy_sgl(struct scatterlist *d_sgl, unsigned int d_nents, off_t d_skip, > + struct scatterlist *s_sgl, unsigned int s_nents, off_t s_skip, > + size_t n_bytes) > +{ > + size_t d_off, s_off, len, d_len, s_len; > + size_t offset = 0; > + struct sg_mapping_iter d_iter; > + struct sg_mapping_iter s_iter; > + > + if (n_bytes == 0) > + return 0; > + sg_miter_start(&d_iter, d_sgl, d_nents, SG_MITER_ATOMIC | SG_MITER_TO_SG); > + sg_miter_start(&s_iter, s_sgl, s_nents, SG_MITER_ATOMIC | SG_MITER_FROM_SG); > + if (!sg_miter_skip(&d_iter, d_skip)) > + goto fini; > + if (!sg_miter_skip(&s_iter, s_skip)) > + goto fini; > + > + for (d_off = 0, s_off = 0; true ; ) { > + /* Assume d_iter.length and s_iter.length can never be 0 */ > + if (d_off == 0) { > + if (!sg_miter_next(&d_iter)) > + break; > + d_len = d_iter.length; > + } else { > + d_len = d_iter.length - d_off; > + } > + if (s_off == 0) { > + if (!sg_miter_next(&s_iter)) > + break; > + s_len = s_iter.length; > + } else { > + s_len = s_iter.length - s_off; > + } > + len = min3(d_len, s_len, n_bytes - offset); > + > + memcpy(d_iter.addr + d_off, s_iter.addr + s_off, len); > + offset += len; > + if (offset >= n_bytes) > + break; > + if (d_len == s_len) { > + d_off = 0; > + s_off = 0; > + } else if (d_len < s_len) { > + d_off = 0; > + s_off += len; > + } else { > + d_off += len; > + s_off = 0; > + } > + } > +fini: > + sg_miter_stop(&d_iter); > + sg_miter_stop(&s_iter); > + return offset; > +} > +EXPORT_SYMBOL(sgl_copy_sgl); > + >
On 2020-10-16 7:17 a.m., Bodo Stroesser wrote: > Hi Douglas, > > AFAICS this patch - and also patch 3 - are not correct. > When started with SG_MITER_ATOMIC, sg_miter_next and sg_miter_stop use > the k(un)map_atomic calls. But these have to be used strictly nested > according to docu and code. > The below code uses the atomic mappings in overlapping mode. That being the case, I'll add d_flags and s_flags arguments that are expected to take either 0 or SG_MITER_ATOMIC and re-test. There probably should be a warning in the notes not to set both d_flags and s_flags to SG_MITER_ATOMIC. My testing to date has not been in irq or soft interrupt state. I should be able to rig a test for the latter. Thanks Doug Gilbert > Am 16.10.20 um 06:52 schrieb Douglas Gilbert: >> Both the SCSI and NVMe subsystems receive user data from the block >> layer in scatterlist_s (aka scatter gather lists (sgl) which are >> often arrays). If drivers in those subsystems represent storage >> (e.g. a ramdisk) or cache "hot" user data then they may also >> choose to use scatterlist_s. Currently there are no sgl to sgl >> operations in the kernel. Start with a copy. >> >> Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> >> --- >> include/linux/scatterlist.h | 4 ++ >> lib/scatterlist.c | 86 +++++++++++++++++++++++++++++++++++++ >> 2 files changed, 90 insertions(+) >> >> diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h >> index 80178afc2a4a..6649414c0749 100644 >> --- a/include/linux/scatterlist.h >> +++ b/include/linux/scatterlist.h >> @@ -321,6 +321,10 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, >> unsigned int nents, >> size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, >> size_t buflen, off_t skip); >> +size_t sgl_copy_sgl(struct scatterlist *d_sgl, unsigned int d_nents, off_t >> d_skip, >> + struct scatterlist *s_sgl, unsigned int s_nents, off_t s_skip, >> + size_t n_bytes); >> + >> /* >> * Maximum number of entries that will be allocated in one piece, if >> * a list larger than this is required then chaining will be utilized. >> diff --git a/lib/scatterlist.c b/lib/scatterlist.c >> index d5770e7f1030..1ec2c909c8d4 100644 >> --- a/lib/scatterlist.c >> +++ b/lib/scatterlist.c >> @@ -974,3 +974,89 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned >> int nents, >> return offset; >> } >> EXPORT_SYMBOL(sg_zero_buffer); >> + >> +/** >> + * sgl_copy_sgl - Copy over a destination sgl from a source sgl >> + * @d_sgl: Destination sgl >> + * @d_nents: Number of SG entries in destination sgl >> + * @d_skip: Number of bytes to skip in destination before copying >> + * @s_sgl: Source sgl >> + * @s_nents: Number of SG entries in source sgl >> + * @s_skip: Number of bytes to skip in source before copying >> + * @n_bytes: The number of bytes to copy >> + * >> + * Returns the number of copied bytes. >> + * >> + * Notes: >> + * Destination arguments appear before the source arguments, as with memcpy(). >> + * >> + * Stops copying if the end of d_sgl or s_sgl is reached. >> + * >> + * Since memcpy() is used, overlapping copies (where d_sgl and s_sgl belong >> + * to the same sgl and the copy regions overlap) are not supported. >> + * >> + * If d_skip is large, potentially spanning multiple d_nents then some >> + * integer arithmetic to adjust d_sgl may improve performance. For example >> + * if d_sgl is built using sgl_alloc_order(chainable=false) then the sgl >> + * will be an array with equally sized segments facilitating that >> + * arithmetic. The suggestion applies to s_skip, s_sgl and s_nents as well. >> + * >> + **/ >> +size_t sgl_copy_sgl(struct scatterlist *d_sgl, unsigned int d_nents, off_t >> d_skip, >> + struct scatterlist *s_sgl, unsigned int s_nents, off_t s_skip, >> + size_t n_bytes) >> +{ >> + size_t d_off, s_off, len, d_len, s_len; >> + size_t offset = 0; >> + struct sg_mapping_iter d_iter; >> + struct sg_mapping_iter s_iter; >> + >> + if (n_bytes == 0) >> + return 0; >> + sg_miter_start(&d_iter, d_sgl, d_nents, SG_MITER_ATOMIC | SG_MITER_TO_SG); >> + sg_miter_start(&s_iter, s_sgl, s_nents, SG_MITER_ATOMIC | SG_MITER_FROM_SG); >> + if (!sg_miter_skip(&d_iter, d_skip)) >> + goto fini; >> + if (!sg_miter_skip(&s_iter, s_skip)) >> + goto fini; >> + >> + for (d_off = 0, s_off = 0; true ; ) { >> + /* Assume d_iter.length and s_iter.length can never be 0 */ >> + if (d_off == 0) { >> + if (!sg_miter_next(&d_iter)) >> + break; >> + d_len = d_iter.length; >> + } else { >> + d_len = d_iter.length - d_off; >> + } >> + if (s_off == 0) { >> + if (!sg_miter_next(&s_iter)) >> + break; >> + s_len = s_iter.length; >> + } else { >> + s_len = s_iter.length - s_off; >> + } >> + len = min3(d_len, s_len, n_bytes - offset); >> + >> + memcpy(d_iter.addr + d_off, s_iter.addr + s_off, len); >> + offset += len; >> + if (offset >= n_bytes) >> + break; >> + if (d_len == s_len) { >> + d_off = 0; >> + s_off = 0; >> + } else if (d_len < s_len) { >> + d_off = 0; >> + s_off += len; >> + } else { >> + d_off += len; >> + s_off = 0; >> + } >> + } >> +fini: >> + sg_miter_stop(&d_iter); >> + sg_miter_stop(&s_iter); >> + return offset; >> +} >> +EXPORT_SYMBOL(sgl_copy_sgl); >> + >>
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 80178afc2a4a..6649414c0749 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -321,6 +321,10 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, size_t buflen, off_t skip); +size_t sgl_copy_sgl(struct scatterlist *d_sgl, unsigned int d_nents, off_t d_skip, + struct scatterlist *s_sgl, unsigned int s_nents, off_t s_skip, + size_t n_bytes); + /* * Maximum number of entries that will be allocated in one piece, if * a list larger than this is required then chaining will be utilized. diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d5770e7f1030..1ec2c909c8d4 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -974,3 +974,89 @@ size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, return offset; } EXPORT_SYMBOL(sg_zero_buffer); + +/** + * sgl_copy_sgl - Copy over a destination sgl from a source sgl + * @d_sgl: Destination sgl + * @d_nents: Number of SG entries in destination sgl + * @d_skip: Number of bytes to skip in destination before copying + * @s_sgl: Source sgl + * @s_nents: Number of SG entries in source sgl + * @s_skip: Number of bytes to skip in source before copying + * @n_bytes: The number of bytes to copy + * + * Returns the number of copied bytes. + * + * Notes: + * Destination arguments appear before the source arguments, as with memcpy(). + * + * Stops copying if the end of d_sgl or s_sgl is reached. + * + * Since memcpy() is used, overlapping copies (where d_sgl and s_sgl belong + * to the same sgl and the copy regions overlap) are not supported. + * + * If d_skip is large, potentially spanning multiple d_nents then some + * integer arithmetic to adjust d_sgl may improve performance. For example + * if d_sgl is built using sgl_alloc_order(chainable=false) then the sgl + * will be an array with equally sized segments facilitating that + * arithmetic. The suggestion applies to s_skip, s_sgl and s_nents as well. + * + **/ +size_t sgl_copy_sgl(struct scatterlist *d_sgl, unsigned int d_nents, off_t d_skip, + struct scatterlist *s_sgl, unsigned int s_nents, off_t s_skip, + size_t n_bytes) +{ + size_t d_off, s_off, len, d_len, s_len; + size_t offset = 0; + struct sg_mapping_iter d_iter; + struct sg_mapping_iter s_iter; + + if (n_bytes == 0) + return 0; + sg_miter_start(&d_iter, d_sgl, d_nents, SG_MITER_ATOMIC | SG_MITER_TO_SG); + sg_miter_start(&s_iter, s_sgl, s_nents, SG_MITER_ATOMIC | SG_MITER_FROM_SG); + if (!sg_miter_skip(&d_iter, d_skip)) + goto fini; + if (!sg_miter_skip(&s_iter, s_skip)) + goto fini; + + for (d_off = 0, s_off = 0; true ; ) { + /* Assume d_iter.length and s_iter.length can never be 0 */ + if (d_off == 0) { + if (!sg_miter_next(&d_iter)) + break; + d_len = d_iter.length; + } else { + d_len = d_iter.length - d_off; + } + if (s_off == 0) { + if (!sg_miter_next(&s_iter)) + break; + s_len = s_iter.length; + } else { + s_len = s_iter.length - s_off; + } + len = min3(d_len, s_len, n_bytes - offset); + + memcpy(d_iter.addr + d_off, s_iter.addr + s_off, len); + offset += len; + if (offset >= n_bytes) + break; + if (d_len == s_len) { + d_off = 0; + s_off = 0; + } else if (d_len < s_len) { + d_off = 0; + s_off += len; + } else { + d_off += len; + s_off = 0; + } + } +fini: + sg_miter_stop(&d_iter); + sg_miter_stop(&s_iter); + return offset; +} +EXPORT_SYMBOL(sgl_copy_sgl); +
Both the SCSI and NVMe subsystems receive user data from the block layer in scatterlist_s (aka scatter gather lists (sgl) which are often arrays). If drivers in those subsystems represent storage (e.g. a ramdisk) or cache "hot" user data then they may also choose to use scatterlist_s. Currently there are no sgl to sgl operations in the kernel. Start with a copy. Signed-off-by: Douglas Gilbert <dgilbert@interlog.com> --- include/linux/scatterlist.h | 4 ++ lib/scatterlist.c | 86 +++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+)