diff mbox series

[net-next,2/4] netconsole: Add option to auto-populate CPU number in userdata

Message ID 20241113-netcon_cpu-v1-2-d187bf7c0321@debian.org
State New
Headers show
Series netconsole: Add support for CPU population | expand

Commit Message

Breno Leitao Nov. 13, 2024, 3:10 p.m. UTC
Add a new option to the netconsole subsystem to automatically populate
the CPU number that is sending the log message in the userdata field.

When enabled, this feature will append a "cpu=<cpu_number>" entry to the
userdata string, allowing the receiver to demultiplex and differentiate
between messages sent from different CPUs. This is particularly useful
when dealing with parallel log output that gets intermixed on the
receiver side.

The new option is added as a flag in the enum userdata_auto and can be
controlled through the populate_cpu_nr sysfs attribute in the netconsole
target's configfs hierarchy.  The main benefits of this change are:

 * Improved log message demultiplexing for parallel output.
 * Better visibility into which CPU a particular log message originated
   from.

Create userdata_auto as an bitwise enum, since I have immediate plans to
expand it.

Since the CPU id doesn't need to be precise, do not disable preemption
when getting the CPU id.

Signed-off-by: Breno Leitao <leitao@debian.org>
---
 drivers/net/netconsole.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

Comments

Breno Leitao Nov. 19, 2024, 5:07 p.m. UTC | #1
Hello Jakub,

On Mon, Nov 18, 2024 at 06:33:36PM -0800, Jakub Kicinski wrote:
> Sorry for the late review, I think this will miss v6.13 :(

That is fine, there is no rush for this change.

> On Wed, 13 Nov 2024 07:10:53 -0800 Breno Leitao wrote:
> >  /**
> >   * struct netconsole_target - Represents a configured netconsole target.
> >   * @list:	Links this target into the target_list.
> > @@ -97,6 +105,7 @@ static struct console netconsole_ext;
> >   * @userdata_group:	Links to the userdata configfs hierarchy
> >   * @userdata_complete:	Cached, formatted string of append
> >   * @userdata_length:	String length of userdata_complete
> > + * @userdata_auto:	Kernel auto-populated bitwise fields in userdata.
> >   * @enabled:	On / off knob to enable / disable target.
> >   *		Visible from userspace (read-write).
> >   *		We maintain a strict 1:1 correspondence between this and
> > @@ -123,6 +132,7 @@ struct netconsole_target {
> >  	struct config_group	userdata_group;
> >  	char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS];
> >  	size_t			userdata_length;
> > +	enum userdata_auto	userdata_auto;
> 
> If you want to set multiple bits here type should probably be unsigned
> long. Otherwise the enum will contain combination of its values, which
> are in themselves not valid enum values ... if that makes sense.

Yes, it does make sense. I had the feeling that something was off as
well, but I was unclear if using something different than `enum
userdata_auto` would be better. I will change to `unsigned long`
> 
> >  #endif
> >  	bool			enabled;
> >  	bool			extended;
> 
> > +	/* Check if CPU NR should be populated, and append it to the user
> > +	 * dictionary.
> > +	 */
> > +	if (child_count < MAX_USERDATA_ITEMS && nt->userdata_auto & AUTO_CPU_NR)
> > +		scnprintf(&nt->userdata_complete[complete_idx],
> > +			  MAX_USERDATA_ENTRY_LENGTH, " cpu=%u\n",
> > +			  raw_smp_processor_id());
> 
> I guess it may be tricky for backward compat, but shouldn't we return
> an error rather than silently skip?

yes, this should be easy to do, in fact. Probably return -E2BIG to
userspace when trying to update the entry. I thought about something as
the following patch, and piggy-back into it.

   Author: Breno Leitao <leitao@debian.org>
   Date:   Tue Nov 19 04:32:56 2024 -0800
   
       netconsole: Enforce userdata entry limit
   
       Currently, attempting to add more than MAX_USERDATA_ITEMS to the userdata
       dictionary silently fails. This patch modifies the code to return -E2BIG
       when the number of elements exceeds the preallocated limit, providing clear
       feedback to userspace about the failure.
   
       Suggested-by: Jakub Kicinski <kuba@kernel.org>
       Signed-off-by: Breno Leitao <leitao@debian.org>
   
   diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
   index 4ea44a2f48f7b..41cff8c8e8f42 100644
   --- a/drivers/net/netconsole.c
   +++ b/drivers/net/netconsole.c
   @@ -692,10 +692,11 @@ static ssize_t userdatum_value_show(struct config_item *item, char *buf)
    	return sysfs_emit(buf, "%s\n", &(to_userdatum(item)->value[0]));
    }
   
   -static void update_userdata(struct netconsole_target *nt)
   +static int update_userdata(struct netconsole_target *nt)
    {
    	int complete_idx = 0, child_count = 0;
    	struct list_head *entry;
   +	int ret = 0;
   
    	/* Clear the current string in case the last userdatum was deleted */
    	nt->userdata_length = 0;
   @@ -705,8 +706,10 @@ static void update_userdata(struct netconsole_target *nt)
    		struct userdatum *udm_item;
    		struct config_item *item;
   
   -		if (child_count >= MAX_USERDATA_ITEMS)
   +		if (child_count >= MAX_USERDATA_ITEMS) {
   +			ret = -E2BIG;
    			break;
   +		}
    		child_count++;
   
    		item = container_of(entry, struct config_item, ci_entry);
   @@ -726,6 +729,7 @@ static void update_userdata(struct netconsole_target *nt)
    	}
    	nt->userdata_length = strnlen(nt->userdata_complete,
    				      sizeof(nt->userdata_complete));
   +	return ret;
    }
   
    static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
   @@ -748,8 +752,9 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
   
    	ud = to_userdata(item->ci_parent);
    	nt = userdata_to_target(ud);
   -	update_userdata(nt);
   -	ret = count;
   +	ret = update_userdata(nt);
   +	if (!ret)
   +		ret = count;
    out_unlock:
    	mutex_unlock(&dynamic_netconsole_mutex);
    	return ret;
   

> 
> >  	nt->userdata_length = strnlen(nt->userdata_complete,
> >  				      sizeof(nt->userdata_complete));
> >  }
> > @@ -757,7 +788,36 @@ static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
> >  	return ret;
> >  }
> >  
> > +static ssize_t populate_cpu_nr_store(struct config_item *item, const char *buf,
> > +				     size_t count)
> > +{
> > +	struct netconsole_target *nt = to_target(item->ci_parent);
> > +	bool cpu_nr_enabled;
> > +	ssize_t ret;
> > +
> > +	if (!nt)
> > +		return -EINVAL;
> 
> Can this happen? Only if function gets called with a NULL @item
> which would be pretty nutty.

Probably not. It is just me being chicken here. I will remove it for the
next version.

Thanks for the review,
--breno
Breno Leitao Dec. 4, 2024, 4:52 p.m. UTC | #2
On Tue, Nov 19, 2024 at 09:07:45AM -0800, Breno Leitao wrote:
> > >  #endif
> > >  	bool			enabled;
> > >  	bool			extended;
> > 
> > > +	/* Check if CPU NR should be populated, and append it to the user
> > > +	 * dictionary.
> > > +	 */
> > > +	if (child_count < MAX_USERDATA_ITEMS && nt->userdata_auto & AUTO_CPU_NR)
> > > +		scnprintf(&nt->userdata_complete[complete_idx],
> > > +			  MAX_USERDATA_ENTRY_LENGTH, " cpu=%u\n",
> > > +			  raw_smp_processor_id());
> > 
> > I guess it may be tricky for backward compat, but shouldn't we return
> > an error rather than silently skip?
> 
> yes, this should be easy to do, in fact. Probably return -E2BIG to
> userspace when trying to update the entry. I thought about something as
> the following patch, and piggy-back into it.

Back to this topic, in fact, this is not needed at all. 

The configfs make item helper (userdatum_make_item()) checks for
exceeding entries, and fails if an additional entry is created.


	static struct config_item *userdatum_make_item(struct config_group *group,
							const char *name)
	{
		....
		child_count = list_count_nodes(&nt->userdata_group.cg_children);
		if (child_count >= MAX_USERDATA_ITEMS)
			return ERR_PTR(-ENOSPC);


I've sent an additional test for this mechanism, and make the check in
update_userdata() a warning instead of just silently dropping the entry.

https://lore.kernel.org/all/20241204-netcons_overflow_test-v1-0-a85a8d0ace21@debian.org/
diff mbox series

Patch

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index 965712d65a014c60f91bf35e48f1b112f66b8439..861e8988b8db4f29ae3de02222c9131059694aba 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -90,6 +90,14 @@  static DEFINE_MUTEX(target_cleanup_list_lock);
  */
 static struct console netconsole_ext;
 
+/* Fields requiring kernel auto-population in userdata.
+ * The fields are designed as bitwise flags, allowing multiple fields to be set
+ */
+enum userdata_auto {
+	/* Populate CPU number that is sending the message */
+	AUTO_CPU_NR = BIT(0),
+};
+
 /**
  * struct netconsole_target - Represents a configured netconsole target.
  * @list:	Links this target into the target_list.
@@ -97,6 +105,7 @@  static struct console netconsole_ext;
  * @userdata_group:	Links to the userdata configfs hierarchy
  * @userdata_complete:	Cached, formatted string of append
  * @userdata_length:	String length of userdata_complete
+ * @userdata_auto:	Kernel auto-populated bitwise fields in userdata.
  * @enabled:	On / off knob to enable / disable target.
  *		Visible from userspace (read-write).
  *		We maintain a strict 1:1 correspondence between this and
@@ -123,6 +132,7 @@  struct netconsole_target {
 	struct config_group	userdata_group;
 	char userdata_complete[MAX_USERDATA_ENTRY_LENGTH * MAX_USERDATA_ITEMS];
 	size_t			userdata_length;
+	enum userdata_auto	userdata_auto;
 #endif
 	bool			enabled;
 	bool			extended;
@@ -371,6 +381,18 @@  static ssize_t remote_mac_show(struct config_item *item, char *buf)
 	return sysfs_emit(buf, "%pM\n", to_target(item)->np.remote_mac);
 }
 
+static ssize_t populate_cpu_nr_show(struct config_item *item, char *buf)
+{
+	struct netconsole_target *nt = to_target(item->ci_parent);
+	bool cpu_nr_enabled;
+
+	mutex_lock(&dynamic_netconsole_mutex);
+	cpu_nr_enabled = nt->userdata_auto & AUTO_CPU_NR;
+	mutex_unlock(&dynamic_netconsole_mutex);
+
+	return sysfs_emit(buf, "%d\n", cpu_nr_enabled);
+}
+
 /*
  * This one is special -- targets created through the configfs interface
  * are not enabled (and the corresponding netpoll activated) by default.
@@ -726,6 +748,15 @@  static void update_userdata(struct netconsole_target *nt)
 					  MAX_USERDATA_ENTRY_LENGTH, " %s=%s\n",
 					  item->ci_name, udm_item->value);
 	}
+
+	/* Check if CPU NR should be populated, and append it to the user
+	 * dictionary.
+	 */
+	if (child_count < MAX_USERDATA_ITEMS && nt->userdata_auto & AUTO_CPU_NR)
+		scnprintf(&nt->userdata_complete[complete_idx],
+			  MAX_USERDATA_ENTRY_LENGTH, " cpu=%u\n",
+			  raw_smp_processor_id());
+
 	nt->userdata_length = strnlen(nt->userdata_complete,
 				      sizeof(nt->userdata_complete));
 }
@@ -757,7 +788,36 @@  static ssize_t userdatum_value_store(struct config_item *item, const char *buf,
 	return ret;
 }
 
+static ssize_t populate_cpu_nr_store(struct config_item *item, const char *buf,
+				     size_t count)
+{
+	struct netconsole_target *nt = to_target(item->ci_parent);
+	bool cpu_nr_enabled;
+	ssize_t ret;
+
+	if (!nt)
+		return -EINVAL;
+
+	mutex_lock(&dynamic_netconsole_mutex);
+	ret = kstrtobool(buf, &cpu_nr_enabled);
+	if (ret)
+		goto out_unlock;
+
+	if (cpu_nr_enabled)
+		nt->userdata_auto |= AUTO_CPU_NR;
+	else
+		nt->userdata_auto &= ~AUTO_CPU_NR;
+
+	update_userdata(nt);
+
+	ret = strnlen(buf, count);
+out_unlock:
+	mutex_unlock(&dynamic_netconsole_mutex);
+	return ret;
+}
+
 CONFIGFS_ATTR(userdatum_, value);
+CONFIGFS_ATTR(, populate_cpu_nr);
 
 static struct configfs_attribute *userdatum_attrs[] = {
 	&userdatum_attr_value,
@@ -819,6 +879,7 @@  static void userdatum_drop(struct config_group *group, struct config_item *item)
 }
 
 static struct configfs_attribute *userdata_attrs[] = {
+	&attr_populate_cpu_nr,
 	NULL,
 };