diff mbox series

spi: Reorder fields in 'struct spi_message'

Message ID c112aad16eb47808e1ec10abd87b3d273c969a68.1677704283.git.christophe.jaillet@wanadoo.fr
State Accepted
Commit ae2ade4ba58167f165fbf3db19380f9b72c56db8
Headers show
Series spi: Reorder fields in 'struct spi_message' | expand

Commit Message

Christophe JAILLET March 1, 2023, 8:58 p.m. UTC
Group some variables based on their sizes to reduce hole and avoid padding.
On x86_64, this shrinks the size from 112 to 96 bytes.

This should have no real impact on memory allocation because 'struct
spi_message' is mostly used on stack, but it can save a few cycles
when the structure is initialized with spi_message_init() and co.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
---
Using pahole

Before:
======
struct spi_message {
	struct list_head           transfers;            /*     0    16 */
	struct spi_device *        spi;                  /*    16     8 */
	unsigned int               is_dma_mapped:1;      /*    24: 0  4 */

	/* XXX 31 bits hole, try to pack */
	/* XXX 4 bytes hole, try to pack */

	void                       (*complete)(void *);  /*    32     8 */
	void *                     context;              /*    40     8 */
	unsigned int               frame_length;         /*    48     4 */
	unsigned int               actual_length;        /*    52     4 */
	int                        status;               /*    56     4 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 1 boundary (64 bytes) --- */
	struct list_head           queue;                /*    64    16 */
	void *                     state;                /*    80     8 */
	struct list_head           resources;            /*    88    16 */
	bool                       prepared;             /*   104     1 */

	/* size: 112, cachelines: 2, members: 12 */
	/* sum members: 93, holes: 2, sum holes: 8 */
	/* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 31 bits */
	/* padding: 7 */
	/* last cacheline: 48 bytes */
};


After:
=====
struct spi_message {
	struct list_head           transfers;            /*     0    16 */
	struct spi_device *        spi;                  /*    16     8 */
	unsigned int               is_dma_mapped:1;      /*    24: 0  4 */

	/* XXX 7 bits hole, try to pack */
	/* Bitfield combined with next fields */

	bool                       prepared;             /*    25     1 */

	/* XXX 2 bytes hole, try to pack */

	int                        status;               /*    28     4 */
	void                       (*complete)(void *);  /*    32     8 */
	void *                     context;              /*    40     8 */
	unsigned int               frame_length;         /*    48     4 */
	unsigned int               actual_length;        /*    52     4 */
	struct list_head           queue;                /*    56    16 */
	/* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
	void *                     state;                /*    72     8 */
	struct list_head           resources;            /*    80    16 */

	/* size: 96, cachelines: 2, members: 12 */
	/* sum members: 93, holes: 1, sum holes: 2 */
	/* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 7 bits */
	/* last cacheline: 32 bytes */
};
---
 include/linux/spi/spi.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

Comments

Muhammad Usama Anjum March 3, 2023, 10:57 a.m. UTC | #1
On 3/2/23 1:58 AM, Christophe JAILLET wrote:
> Group some variables based on their sizes to reduce hole and avoid padding.
> On x86_64, this shrinks the size from 112 to 96 bytes.
> 
> This should have no real impact on memory allocation because 'struct
> spi_message' is mostly used on stack, but it can save a few cycles
> when the structure is initialized with spi_message_init() and co.
> 
> Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Tested-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>

> ---
> Using pahole
> 
> Before:
> ======
> struct spi_message {
> 	struct list_head           transfers;            /*     0    16 */
> 	struct spi_device *        spi;                  /*    16     8 */
> 	unsigned int               is_dma_mapped:1;      /*    24: 0  4 */
> 
> 	/* XXX 31 bits hole, try to pack */
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	void                       (*complete)(void *);  /*    32     8 */
> 	void *                     context;              /*    40     8 */
> 	unsigned int               frame_length;         /*    48     4 */
> 	unsigned int               actual_length;        /*    52     4 */
> 	int                        status;               /*    56     4 */
> 
> 	/* XXX 4 bytes hole, try to pack */
> 
> 	/* --- cacheline 1 boundary (64 bytes) --- */
> 	struct list_head           queue;                /*    64    16 */
> 	void *                     state;                /*    80     8 */
> 	struct list_head           resources;            /*    88    16 */
> 	bool                       prepared;             /*   104     1 */
> 
> 	/* size: 112, cachelines: 2, members: 12 */
> 	/* sum members: 93, holes: 2, sum holes: 8 */
> 	/* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 31 bits */
> 	/* padding: 7 */
> 	/* last cacheline: 48 bytes */
> };
> 
> 
> After:
> =====
> struct spi_message {
> 	struct list_head           transfers;            /*     0    16 */
> 	struct spi_device *        spi;                  /*    16     8 */
> 	unsigned int               is_dma_mapped:1;      /*    24: 0  4 */
> 
> 	/* XXX 7 bits hole, try to pack */
> 	/* Bitfield combined with next fields */
> 
> 	bool                       prepared;             /*    25     1 */
> 
> 	/* XXX 2 bytes hole, try to pack */
> 
> 	int                        status;               /*    28     4 */
> 	void                       (*complete)(void *);  /*    32     8 */
> 	void *                     context;              /*    40     8 */
> 	unsigned int               frame_length;         /*    48     4 */
> 	unsigned int               actual_length;        /*    52     4 */
> 	struct list_head           queue;                /*    56    16 */
> 	/* --- cacheline 1 boundary (64 bytes) was 8 bytes ago --- */
> 	void *                     state;                /*    72     8 */
> 	struct list_head           resources;            /*    80    16 */
> 
> 	/* size: 96, cachelines: 2, members: 12 */
> 	/* sum members: 93, holes: 1, sum holes: 2 */
> 	/* sum bitfield members: 1 bits, bit holes: 1, sum bit holes: 7 bits */
> 	/* last cacheline: 32 bytes */
> };
> ---
>  include/linux/spi/spi.h | 8 ++++----
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
> index 4fa26b9a3572..bdb35a91b4bf 100644
> --- a/include/linux/spi/spi.h
> +++ b/include/linux/spi/spi.h
> @@ -1093,6 +1093,9 @@ struct spi_message {
>  
>  	unsigned		is_dma_mapped:1;
>  
> +	/* spi_prepare_message() was called for this message */
> +	bool			prepared;
> +
>  	/* REVISIT:  we might want a flag affecting the behavior of the
>  	 * last transfer ... allowing things like "read 16 bit length L"
>  	 * immediately followed by "read L bytes".  Basically imposing
> @@ -1105,11 +1108,11 @@ struct spi_message {
>  	 */
>  
>  	/* Completion is reported through a callback */
> +	int			status;
>  	void			(*complete)(void *context);
>  	void			*context;
>  	unsigned		frame_length;
>  	unsigned		actual_length;
> -	int			status;
>  
>  	/* For optional use by whatever driver currently owns the
>  	 * spi_message ...  between calls to spi_async and then later
> @@ -1120,9 +1123,6 @@ struct spi_message {
>  
>  	/* List of spi_res reources when the spi message is processed */
>  	struct list_head        resources;
> -
> -	/* spi_prepare_message() was called for this message */
> -	bool			prepared;
>  };
>  
>  static inline void spi_message_init_no_memset(struct spi_message *m)
diff mbox series

Patch

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 4fa26b9a3572..bdb35a91b4bf 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1093,6 +1093,9 @@  struct spi_message {
 
 	unsigned		is_dma_mapped:1;
 
+	/* spi_prepare_message() was called for this message */
+	bool			prepared;
+
 	/* REVISIT:  we might want a flag affecting the behavior of the
 	 * last transfer ... allowing things like "read 16 bit length L"
 	 * immediately followed by "read L bytes".  Basically imposing
@@ -1105,11 +1108,11 @@  struct spi_message {
 	 */
 
 	/* Completion is reported through a callback */
+	int			status;
 	void			(*complete)(void *context);
 	void			*context;
 	unsigned		frame_length;
 	unsigned		actual_length;
-	int			status;
 
 	/* For optional use by whatever driver currently owns the
 	 * spi_message ...  between calls to spi_async and then later
@@ -1120,9 +1123,6 @@  struct spi_message {
 
 	/* List of spi_res reources when the spi message is processed */
 	struct list_head        resources;
-
-	/* spi_prepare_message() was called for this message */
-	bool			prepared;
 };
 
 static inline void spi_message_init_no_memset(struct spi_message *m)