diff mbox series

[v2,1/2] livepatch: Allow user to specify functions to search for on a stack

Message ID 20211210124449.21537-2-mbenes@suse.cz
State New
Headers show
Series livepatch: Allow user to specify functions to search for on a stack | expand

Commit Message

Miroslav Benes Dec. 10, 2021, 12:44 p.m. UTC
livepatch's consistency model requires that no live patched function
must be found on any task's stack during a transition process after a
live patch is applied. It is achieved by walking through stacks of all
blocked tasks.

The user might also want to define more functions to search for without
them being patched at all. It may either help with preparing a live
patch, which would otherwise require adding more functions just to
achieve the consistency, or it can be used to overcome deficiencies the
stack checking inherently has.

Consider the following example, in which GCC may optimize function
parent() so that a part of it is moved to a different section
(child.cold()) and parent() jumps to it. If both parent() and child2()
are to patching targets, things can break easily if a task sleeps in
child.cold() and new patched child2() changes ABI. parent() is not found
on the stack, child.cold() jumps back to parent() eventually and new
child2() is called.

  parent():		/* to-be-patched */
    ...
    jmp child.cold()	/* cannot be patched */
      ...
      schedule()
      ...
      jmp <back>
    ...
    call child2()	/* to-be-patched */
    ...

Allow the user to specify such functions (parent() in the example) using
stack_only member of struct klp_func.

The functions are still shown in sysfs directory. Nop objects are not
created for them.

Signed-off-by: Miroslav Benes <mbenes@suse.cz>
---
 include/linux/livepatch.h     |  3 +++
 kernel/livepatch/core.c       | 28 +++++++++++++++++++++++++++-
 kernel/livepatch/patch.c      |  6 ++++++
 kernel/livepatch/transition.c |  5 ++++-
 4 files changed, 40 insertions(+), 2 deletions(-)

Comments

Josh Poimboeuf Dec. 13, 2021, 7 p.m. UTC | #1
On Fri, Dec 10, 2021 at 01:44:48PM +0100, Miroslav Benes wrote:
> --- a/include/linux/livepatch.h
> +++ b/include/linux/livepatch.h
> @@ -29,6 +29,8 @@
>   * @new_func:	pointer to the patched function code
>   * @old_sympos: a hint indicating which symbol position the old function
>   *		can be found (optional)
> + * @stack_only:	only search for the function (specified by old_name) on any
> + * 		task's stack

This should probably make it clear that it doesn't actually patch the
function.  How about something like:

 * @stack_only:	don't patch old_func; only make sure it's not on the stack


>   * @old_func:	pointer to the function being patched
>   * @kobj:	kobject for sysfs resources
>   * @node:	list node for klp_object func_list
> @@ -66,6 +68,7 @@ struct klp_func {
>  	 * in kallsyms for the given object is used.
>  	 */
>  	unsigned long old_sympos;
> +	bool stack_only;
>  
>  	/* internal */
>  	void *old_func;
> diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
> index 335d988bd811..62ff4180dc9b 100644
> --- a/kernel/livepatch/core.c
> +++ b/kernel/livepatch/core.c
> @@ -89,6 +89,10 @@ static struct klp_func *klp_find_func(struct klp_object *obj,
>  	struct klp_func *func;
>  
>  	klp_for_each_func(obj, func) {
> +		/* Do not create nop klp_func for stack_only function */
> +		if (func->stack_only)
> +			return func;
> +

This has me confused.  Maybe I'm missing something.

First, klp_find_func() is a surprising place for this behavior.

Second, if obj's first func happens to be stack_only, this will short
circuit the rest of the list traversal and will effectively prevent nops
for all the rest of the funcs, even if they're *not* stack_only.

Third, I'm not sure this approach would even make sense.  I was thinking
there are two special cases to consider:

1) If old_func is stack_only, that's effectively the same as !old_func,
   in which case we don't need a nop.

2) If old_func is *not* stack_only, but new_func *is* stack_only, that's
   effectively the same as (old_func && !new_func), in which case we
   *do* want a nop.  Since new_func already exists, we can just convert
   it from stack_only to nop.

Does that make sense?  Or am I missing something?

For example:

--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -536,9 +536,23 @@ static int klp_add_object_nops(struct klp_patch *patch,
 	}
 
 	klp_for_each_func(old_obj, old_func) {
+		if (old_func->stack_only) {
+			/* equivalent to !old_func; no nop needed */
+			continue;
+		}
 		func = klp_find_func(obj, old_func);
-		if (func)
+		if (func) {
+			if (func->stack_only) {
+				/*
+				 * equivalent to (old_func && !new_func);
+				 * convert stack_only to nop:
+				 */
+				func->stack_only = false;
+				func->nop = true;
+			}
+
 			continue;
+		}
 
 		func = klp_alloc_func_nop(old_func, obj);
 		if (!func)


And while we're at it, we may want to rename "{func,obj}" to
"new_{func,obj}" for those functions which have "old_{func,obj}".  It
would help prevent confusion between the two.

>  		if ((strcmp(old_func->old_name, func->old_name) == 0) &&
>  		    (old_func->old_sympos == func->old_sympos)) {
>  			return func;
> @@ -499,6 +503,17 @@ static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func,
>  	return func;
>  }
>  
> +static bool klp_is_object_stack_only(struct klp_object *old_obj)
> +{
> +	struct klp_func *old_func;
> +
> +	klp_for_each_func(old_obj, old_func)
> +		if (!old_func->stack_only)
> +			return false;
> +
> +	return true;
> +}
> +
>  static int klp_add_object_nops(struct klp_patch *patch,
>  			       struct klp_object *old_obj)
>  {
> @@ -508,6 +523,13 @@ static int klp_add_object_nops(struct klp_patch *patch,
>  	obj = klp_find_object(patch, old_obj);
>  
>  	if (!obj) {
> +		/*
> +		 * Do not create nop klp_object for old_obj which contains
> +		 * stack_only functions only.
> +		 */
> +		if (klp_is_object_stack_only(old_obj))
> +			return 0;

This code is already pretty self explanatory and the comment isn't
needed IMO.

> +
>  		obj = klp_alloc_object_dynamic(old_obj->name, patch);
>  		if (!obj)
>  			return -ENOMEM;
> @@ -723,8 +745,9 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
>  	/*
>  	 * NOPs get the address later. The patched module must be loaded,
>  	 * see klp_init_object_loaded().
> +	 * stack_only functions do not get new_func addresses at all.
>  	 */
> -	if (!func->new_func && !func->nop)
> +	if (!func->new_func && !func->nop && !func->stack_only)
>  		return -EINVAL;

Same here, I'm not sure this comment really helps.

>  
>  	if (strlen(func->old_name) >= KSYM_NAME_LEN)
> @@ -804,6 +827,9 @@ static int klp_init_object_loaded(struct klp_patch *patch,
>  		if (func->nop)
>  			func->new_func = func->old_func;
>  
> +		if (func->stack_only)
> +			continue;
> +
>  		ret = kallsyms_lookup_size_offset((unsigned long)func->new_func,
>  						  &func->new_size, NULL);
>  		if (!ret) {
> diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
> index fe316c021d73..221ed691cc7f 100644
> --- a/kernel/livepatch/patch.c
> +++ b/kernel/livepatch/patch.c
> @@ -250,6 +250,9 @@ static void __klp_unpatch_object(struct klp_object *obj, bool nops_only)
>  		if (nops_only && !func->nop)
>  			continue;
>  
> +		if (func->stack_only)
> +			continue;
> +
>  		if (func->patched)
>  			klp_unpatch_func(func);
>  	}
> @@ -273,6 +276,9 @@ int klp_patch_object(struct klp_object *obj)
>  		return -EINVAL;
>  
>  	klp_for_each_func(obj, func) {
> +		if (func->stack_only)
> +			continue;
> +

Instead of these stack_only checks, we might want to add a new
klp_for_each_patchable_func() macro.  It could also be used in
klp_add_object_nops() to filter out old_func->stack_only.

>  		ret = klp_patch_func(func);
>  		if (ret) {
>  			klp_unpatch_object(obj);
> diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
> index 5683ac0d2566..fa0630fcab1a 100644
> --- a/kernel/livepatch/transition.c
> +++ b/kernel/livepatch/transition.c
> @@ -200,7 +200,10 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
>  	for (i = 0; i < nr_entries; i++) {
>  		address = entries[i];
>  
> -		if (klp_target_state == KLP_UNPATCHED) {
> +		if (func->stack_only) {
> +			func_addr = (unsigned long)func->old_func;
> +			func_size = func->old_size;
> +		} else if (klp_target_state == KLP_UNPATCHED) {

Hm, what does this mean for the unpatching case?  What if the new
function's .cold child is on the stack when we're trying to unpatch?

Would it make sense to allow the user specify a 'new_func' for
stack_only, which is a func to check on the stack when unpatching?  Then
new_func could point to the new .cold child.  And then
klp_check_stack_func() wouldn't need a special case.
Miroslav Benes Dec. 14, 2021, 8:47 a.m. UTC | #2
On Mon, 13 Dec 2021, Josh Poimboeuf wrote:

> On Fri, Dec 10, 2021 at 01:44:48PM +0100, Miroslav Benes wrote:
> > --- a/include/linux/livepatch.h
> > +++ b/include/linux/livepatch.h
> > @@ -29,6 +29,8 @@
> >   * @new_func:	pointer to the patched function code
> >   * @old_sympos: a hint indicating which symbol position the old function
> >   *		can be found (optional)
> > + * @stack_only:	only search for the function (specified by old_name) on any
> > + * 		task's stack
> 
> This should probably make it clear that it doesn't actually patch the
> function.  How about something like:
> 
>  * @stack_only:	don't patch old_func; only make sure it's not on the stack

Definitely better, thanks. 
 
> >   * @old_func:	pointer to the function being patched
> >   * @kobj:	kobject for sysfs resources
> >   * @node:	list node for klp_object func_list
> > @@ -66,6 +68,7 @@ struct klp_func {
> >  	 * in kallsyms for the given object is used.
> >  	 */
> >  	unsigned long old_sympos;
> > +	bool stack_only;
> >  
> >  	/* internal */
> >  	void *old_func;
> > diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
> > index 335d988bd811..62ff4180dc9b 100644
> > --- a/kernel/livepatch/core.c
> > +++ b/kernel/livepatch/core.c
> > @@ -89,6 +89,10 @@ static struct klp_func *klp_find_func(struct klp_object *obj,
> >  	struct klp_func *func;
> >  
> >  	klp_for_each_func(obj, func) {
> > +		/* Do not create nop klp_func for stack_only function */
> > +		if (func->stack_only)
> > +			return func;
> > +
> 
> This has me confused.  Maybe I'm missing something.
> 
> First, klp_find_func() is a surprising place for this behavior.

You are right, it is not a nice place.
 
> Second, if obj's first func happens to be stack_only, this will short
> circuit the rest of the list traversal and will effectively prevent nops
> for all the rest of the funcs, even if they're *not* stack_only.

Oh, of course.

> Third, I'm not sure this approach would even make sense.  I was thinking
> there are two special cases to consider:
> 
> 1) If old_func is stack_only, that's effectively the same as !old_func,
>    in which case we don't need a nop.

Correct.

> 2) If old_func is *not* stack_only, but new_func *is* stack_only, that's
>    effectively the same as (old_func && !new_func), in which case we
>    *do* want a nop.  Since new_func already exists, we can just convert
>    it from stack_only to nop.

And I somehow missed this case.

> Does that make sense?  Or am I missing something?
> 
> For example:
> 
> --- a/kernel/livepatch/core.c
> +++ b/kernel/livepatch/core.c
> @@ -536,9 +536,23 @@ static int klp_add_object_nops(struct klp_patch *patch,
>  	}
>  
>  	klp_for_each_func(old_obj, old_func) {
> +		if (old_func->stack_only) {
> +			/* equivalent to !old_func; no nop needed */
> +			continue;
> +		}

Nicer.

>  		func = klp_find_func(obj, old_func);
> -		if (func)
> +		if (func) {
> +			if (func->stack_only) {
> +				/*
> +				 * equivalent to (old_func && !new_func);
> +				 * convert stack_only to nop:
> +				 */
> +				func->stack_only = false;
> +				func->nop = true;
> +			}
> +
>  			continue;
> +		}
>
>  		func = klp_alloc_func_nop(old_func, obj);
>  		if (!func)

I think that it cannot be that straightforward. We assume that nop 
functions are allocated dynamically elsewhere in the code, so the 
conversion here from a stack_only function to a nop would cause troubles. 
I like the idea though. We would also need to set func->new_func for it 
and there may be some more places to handle, which I am missing now.

If I understood it correctly, Petr elsewhere in the thread proposed to 
ignore nop functions completely. They would be allocated, not used and 
discarded once a replace live patch is applied. I did not like the idea, 
because it seemed hacky. And it would probably suffer from similar issues 
as the above.
 
> And while we're at it, we may want to rename "{func,obj}" to
> "new_{func,obj}" for those functions which have "old_{func,obj}".  It
> would help prevent confusion between the two.

Yes, the naming here does not help.

> >  		if ((strcmp(old_func->old_name, func->old_name) == 0) &&
> >  		    (old_func->old_sympos == func->old_sympos)) {
> >  			return func;
> > @@ -499,6 +503,17 @@ static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func,
> >  	return func;
> >  }
> >  
> > +static bool klp_is_object_stack_only(struct klp_object *old_obj)
> > +{
> > +	struct klp_func *old_func;
> > +
> > +	klp_for_each_func(old_obj, old_func)
> > +		if (!old_func->stack_only)
> > +			return false;
> > +
> > +	return true;
> > +}
> > +
> >  static int klp_add_object_nops(struct klp_patch *patch,
> >  			       struct klp_object *old_obj)
> >  {
> > @@ -508,6 +523,13 @@ static int klp_add_object_nops(struct klp_patch *patch,
> >  	obj = klp_find_object(patch, old_obj);
> >  
> >  	if (!obj) {
> > +		/*
> > +		 * Do not create nop klp_object for old_obj which contains
> > +		 * stack_only functions only.
> > +		 */
> > +		if (klp_is_object_stack_only(old_obj))
> > +			return 0;
> 
> This code is already pretty self explanatory and the comment isn't
> needed IMO.

Ok.

> > +
> >  		obj = klp_alloc_object_dynamic(old_obj->name, patch);
> >  		if (!obj)
> >  			return -ENOMEM;
> > @@ -723,8 +745,9 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
> >  	/*
> >  	 * NOPs get the address later. The patched module must be loaded,
> >  	 * see klp_init_object_loaded().
> > +	 * stack_only functions do not get new_func addresses at all.
> >  	 */
> > -	if (!func->new_func && !func->nop)
> > +	if (!func->new_func && !func->nop && !func->stack_only)
> >  		return -EINVAL;
> 
> Same here, I'm not sure this comment really helps.

Hm, I think the original comment is useful and it would be strange to add 
a new check without extending it. I can remove the hunk, no problem.

> >  
> >  	if (strlen(func->old_name) >= KSYM_NAME_LEN)
> > @@ -804,6 +827,9 @@ static int klp_init_object_loaded(struct klp_patch *patch,
> >  		if (func->nop)
> >  			func->new_func = func->old_func;
> >  
> > +		if (func->stack_only)
> > +			continue;
> > +
> >  		ret = kallsyms_lookup_size_offset((unsigned long)func->new_func,
> >  						  &func->new_size, NULL);
> >  		if (!ret) {
> > diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
> > index fe316c021d73..221ed691cc7f 100644
> > --- a/kernel/livepatch/patch.c
> > +++ b/kernel/livepatch/patch.c
> > @@ -250,6 +250,9 @@ static void __klp_unpatch_object(struct klp_object *obj, bool nops_only)
> >  		if (nops_only && !func->nop)
> >  			continue;
> >  
> > +		if (func->stack_only)
> > +			continue;
> > +
> >  		if (func->patched)
> >  			klp_unpatch_func(func);
> >  	}
> > @@ -273,6 +276,9 @@ int klp_patch_object(struct klp_object *obj)
> >  		return -EINVAL;
> >  
> >  	klp_for_each_func(obj, func) {
> > +		if (func->stack_only)
> > +			continue;
> > +
> 
> Instead of these stack_only checks, we might want to add a new
> klp_for_each_patchable_func() macro.  It could also be used in
> klp_add_object_nops() to filter out old_func->stack_only.

Maybe. On the other hand, I like the explicit check here and there. Will 
consider...
 
> >  		ret = klp_patch_func(func);
> >  		if (ret) {
> >  			klp_unpatch_object(obj);
> > diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
> > index 5683ac0d2566..fa0630fcab1a 100644
> > --- a/kernel/livepatch/transition.c
> > +++ b/kernel/livepatch/transition.c
> > @@ -200,7 +200,10 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
> >  	for (i = 0; i < nr_entries; i++) {
> >  		address = entries[i];
> >  
> > -		if (klp_target_state == KLP_UNPATCHED) {
> > +		if (func->stack_only) {
> > +			func_addr = (unsigned long)func->old_func;
> > +			func_size = func->old_size;
> > +		} else if (klp_target_state == KLP_UNPATCHED) {
> 
> Hm, what does this mean for the unpatching case?  What if the new
> function's .cold child is on the stack when we're trying to unpatch?

Good question. I did not realize it worked both ways. Of course it does.

> Would it make sense to allow the user specify a 'new_func' for
> stack_only, which is a func to check on the stack when unpatching?  Then
> new_func could point to the new .cold child.  And then
> klp_check_stack_func() wouldn't need a special case.

Maybe. klp_check_stack_func() would still need a special case, no? It 
would just move down to KLP_PATCHED case. Or, the check after 
klp_find_ops() would have to be improved, but I would like the explicit 
check for stack_only better.

Thanks for the review.

Miroslav
Petr Mladek Dec. 14, 2021, 12:27 p.m. UTC | #3
On Tue 2021-12-14 09:47:59, Miroslav Benes wrote:
> On Mon, 13 Dec 2021, Josh Poimboeuf wrote:
> > On Fri, Dec 10, 2021 at 01:44:48PM +0100, Miroslav Benes wrote:
> > Second, if obj's first func happens to be stack_only, this will short
> > circuit the rest of the list traversal and will effectively prevent nops
> > for all the rest of the funcs, even if they're *not* stack_only.
> 
> Oh, of course.
> 
> > Third, I'm not sure this approach would even make sense.  I was thinking
> > there are two special cases to consider:
> > 
> > 1) If old_func is stack_only, that's effectively the same as !old_func,
> >    in which case we don't need a nop.
> 
> Correct.
> 
> > 2) If old_func is *not* stack_only, but new_func *is* stack_only, that's
> >    effectively the same as (old_func && !new_func), in which case we
> >    *do* want a nop.  Since new_func already exists, we can just convert
> >    it from stack_only to nop.
> 
> And I somehow missed this case.
> 
> > Does that make sense?  Or am I missing something?
> > 
> > For example:
> > 
> > --- a/kernel/livepatch/core.c
> > +++ b/kernel/livepatch/core.c
> > @@ -536,9 +536,23 @@ static int klp_add_object_nops(struct klp_patch *patch,
> >  	}
> >  
> >  	klp_for_each_func(old_obj, old_func) {
> > +		if (old_func->stack_only) {
> > +			/* equivalent to !old_func; no nop needed */
> > +			continue;
> > +		}
> 
> Nicer.
> 
> >  		func = klp_find_func(obj, old_func);
> > -		if (func)
> > +		if (func) {
> > +			if (func->stack_only) {
> > +				/*
> > +				 * equivalent to (old_func && !new_func);
> > +				 * convert stack_only to nop:
> > +				 */
> > +				func->stack_only = false;
> > +				func->nop = true;
> > +			}
> > +
> >  			continue;
> > +		}
> >
> >  		func = klp_alloc_func_nop(old_func, obj);
> >  		if (!func)
> 
> I think that it cannot be that straightforward. We assume that nop 
> functions are allocated dynamically elsewhere in the code, so the 
> conversion here from a stack_only function to a nop would cause troubles. 
> I like the idea though. We would also need to set func->new_func for it 
> and there may be some more places to handle, which I am missing now.

Yup. It is not that easy because nops are dynamically allocated and
are freed after the transition is completed.

Well, stack_only has the same effect as nop from the livepatching POV.
Both are checked on stack and both do not redirect the code. The only
difference is that stack_only struct klp_func is static. It need not
be allocated and need not be freed.


> If I understood it correctly, Petr elsewhere in the thread proposed to 
> ignore nop functions completely. They would be allocated, not used and 
> discarded once a replace live patch is applied. I did not like the idea, 
> because it seemed hacky. And it would probably suffer from similar issues 
> as the above.

This is probably misunderstanding. I proposed to do not register the
ftrace handler for stack_only entries. But it would work only when
there is not already registered ftrace handler from another livepatch.
So, I agree that it is a bad idea.

Better solution seems to handle stack_only entries the same way as
nops except for the allocation/freeing.


> > > --- a/kernel/livepatch/transition.c
> > > +++ b/kernel/livepatch/transition.c
> > > @@ -200,7 +200,10 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
> > >  	for (i = 0; i < nr_entries; i++) {
> > >  		address = entries[i];
> > >  
> > > -		if (klp_target_state == KLP_UNPATCHED) {
> > > +		if (func->stack_only) {
> > > +			func_addr = (unsigned long)func->old_func;
> > > +			func_size = func->old_size;
> > > +		} else if (klp_target_state == KLP_UNPATCHED) {
> > 
> > Hm, what does this mean for the unpatching case?  What if the new
> > function's .cold child is on the stack when we're trying to unpatch?
> 
> Good question. I did not realize it worked both ways. Of course it does.
> 
> > Would it make sense to allow the user specify a 'new_func' for
> > stack_only, which is a func to check on the stack when unpatching?  Then
> > new_func could point to the new .cold child.  And then
> > klp_check_stack_func() wouldn't need a special case.

I am confused. My understanding is that .cold child is explicitly
livepatched to the new .cold child like it is done in the selftest:

static struct klp_func funcs_stack_only[] = {
	{
		.old_name = "child_function",
		.new_func = livepatch_child_function,
	}, {

We should not need anything special to check it on stack.
We only need to make sure that we check all .stack_only functions of
the to-be-disabled livepatch.

Best Regards,
Petr
Petr Mladek Dec. 14, 2021, 3:40 p.m. UTC | #4
On Tue 2021-12-14 13:27:33, Petr Mladek wrote:
> On Tue 2021-12-14 09:47:59, Miroslav Benes wrote:
> > On Mon, 13 Dec 2021, Josh Poimboeuf wrote:
> > > On Fri, Dec 10, 2021 at 01:44:48PM +0100, Miroslav Benes wrote:
> > > > --- a/kernel/livepatch/transition.c
> > > > +++ b/kernel/livepatch/transition.c
> > > > @@ -200,7 +200,10 @@ static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
> > > >  	for (i = 0; i < nr_entries; i++) {
> > > >  		address = entries[i];
> > > >  
> > > > -		if (klp_target_state == KLP_UNPATCHED) {
> > > > +		if (func->stack_only) {
> > > > +			func_addr = (unsigned long)func->old_func;
> > > > +			func_size = func->old_size;
> > > > +		} else if (klp_target_state == KLP_UNPATCHED) {
> > > 
> > > Hm, what does this mean for the unpatching case?  What if the new
> > > function's .cold child is on the stack when we're trying to unpatch?
> > 
> > Good question. I did not realize it worked both ways. Of course it does.
> > 
> > > Would it make sense to allow the user specify a 'new_func' for
> > > stack_only, which is a func to check on the stack when unpatching?  Then
> > > new_func could point to the new .cold child.  And then
> > > klp_check_stack_func() wouldn't need a special case.
> 
> I am confused. My understanding is that .cold child is explicitly
> livepatched to the new .cold child like it is done in the selftest:
> 
> static struct klp_func funcs_stack_only[] = {
> 	{
> 		.old_name = "child_function",
> 		.new_func = livepatch_child_function,
> 	}, {
> 
> We should not need anything special to check it on stack.
> We only need to make sure that we check all .stack_only functions of
> the to-be-disabled livepatch.

We have discussed this with Miroslav and it seems to be even more
complicated. My current understanding is that we actually have
three functions involved:

  parent_func()
    call child_func()
      jmp child_func.cold

We livepatch child_func() that uses jmp and need not be on stack.
This is why we want to check parent_func() on stack.
For this, we define something like:

static struct klp_func funcs[] = {
	{
		.old_name = "child_func",
		.new_func = livepatch_child_func,   // livepatched func
	},
	{
		.old_name = "parent_func",
		.stack_only = true,		    // stack only
	},


Now, there might be the same problem with livepatch_child_func.
The call chain would be:

  parent_func()
    call child_func() ---> livepatch_child_func()
      jmp livepatch_child_func.cold


=> We need to check the very same parent_func() also when unpatching.


Note that already do the same for nops:

static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func,
					   struct klp_object *obj)
{
[...]
	klp_init_func_early(obj, func);
	/*
	 * func->new_func is same as func->old_func. These addresses are
	 * set when the object is loaded, see klp_init_object_loaded().
	 */
	func->old_sympos = old_func->old_sympos;
	func->nop = true;
[...]
}

where

static int klp_init_object_loaded(struct klp_patch *patch,
				  struct klp_object *obj)
{
[...]
	if (func->nop)
			func->new_func = func->old_func;
[...]


This is another argument that we should somehow reuse the nops code
also for stack_only checks.

Does it make sense, please? ;-)

Best Regards,
Petr
Josh Poimboeuf Dec. 14, 2021, 11:48 p.m. UTC | #5
On Tue, Dec 14, 2021 at 04:40:11PM +0100, Petr Mladek wrote:
> > > > Hm, what does this mean for the unpatching case?  What if the new
> > > > function's .cold child is on the stack when we're trying to unpatch?
> > > 
> > > Good question. I did not realize it worked both ways. Of course it does.
> > > 
> > > > Would it make sense to allow the user specify a 'new_func' for
> > > > stack_only, which is a func to check on the stack when unpatching?  Then
> > > > new_func could point to the new .cold child.  And then
> > > > klp_check_stack_func() wouldn't need a special case.
> > 
> > I am confused. My understanding is that .cold child is explicitly
> > livepatched to the new .cold child like it is done in the selftest:
> > 
> > static struct klp_func funcs_stack_only[] = {
> > 	{
> > 		.old_name = "child_function",
> > 		.new_func = livepatch_child_function,
> > 	}, {
> > 
> > We should not need anything special to check it on stack.
> > We only need to make sure that we check all .stack_only functions of
> > the to-be-disabled livepatch.
> 
> We have discussed this with Miroslav and it seems to be even more
> complicated. My current understanding is that we actually have
> three functions involved:
> 
>   parent_func()
>     call child_func()
>       jmp child_func.cold
> 
> We livepatch child_func() that uses jmp and need not be on stack.
> This is why we want to check parent_func() on stack.
> For this, we define something like:
> 
> static struct klp_func funcs[] = {
> 	{
> 		.old_name = "child_func",
> 		.new_func = livepatch_child_func,   // livepatched func
> 	},
> 	{
> 		.old_name = "parent_func",
> 		.stack_only = true,		    // stack only
> 	},

Hm, this is different than how I understand it.

In the past I referred to the "parent" as the function which jumps to
the cold ("child") function.  So maybe we're getting confused by
different terminology.  But here I'll go with the naming from your
example.

If parent_func() is stack_only, that could create some false positive
scenarios where patching stalls unnecessarily.  Also, wouldn't all of
child_func()'s callers have to be made stack_only?  How would you
definitively find all the callers?

Instead I was thinking child_func.cold() should be stack_only.

e.g.:

static struct klp_func funcs[] = {
	{
		.old_name = "child_func",
		.new_func = livepatch_child_func,
	},
	{
		.old_name = "child_func.cold",
		.new_name = "livepatch_child_func.cold",
		.stack_only = true,
	},

Any reason why that wouldn't work?

> This is another argument that we should somehow reuse the nops code
> also for stack_only checks.
> 
> Does it make sense, please? ;-)

Yes, if parent_func() is stack_only.

But if child_func.cold() is stack_only, that doesn't work, because it
doesn't have a fentry hook.
Petr Mladek Dec. 15, 2021, 2:37 p.m. UTC | #6
On Tue 2021-12-14 15:48:36, Josh Poimboeuf wrote:
> On Tue, Dec 14, 2021 at 04:40:11PM +0100, Petr Mladek wrote:
> > > > > Hm, what does this mean for the unpatching case?  What if the new
> > > > > function's .cold child is on the stack when we're trying to unpatch?
> > > > 
> > > > Good question. I did not realize it worked both ways. Of course it does.
> > > > 
> > > > > Would it make sense to allow the user specify a 'new_func' for
> > > > > stack_only, which is a func to check on the stack when unpatching?  Then
> > > > > new_func could point to the new .cold child.  And then
> > > > > klp_check_stack_func() wouldn't need a special case.
> > > 
> > > I am confused. My understanding is that .cold child is explicitly
> > > livepatched to the new .cold child like it is done in the selftest:
> > > 
> > > static struct klp_func funcs_stack_only[] = {
> > > 	{
> > > 		.old_name = "child_function",
> > > 		.new_func = livepatch_child_function,
> > > 	}, {
> > > 
> > > We should not need anything special to check it on stack.
> > > We only need to make sure that we check all .stack_only functions of
> > > the to-be-disabled livepatch.
> > 
> > We have discussed this with Miroslav and it seems to be even more
> > complicated. My current understanding is that we actually have
> > three functions involved:
> > 
> >   parent_func()
> >     call child_func()
> >       jmp child_func.cold
> > 
> > We livepatch child_func() that uses jmp and need not be on stack.
> > This is why we want to check parent_func() on stack.
> > For this, we define something like:
> > 
> > static struct klp_func funcs[] = {
> > 	{
> > 		.old_name = "child_func",
> > 		.new_func = livepatch_child_func,   // livepatched func
> > 	},
> > 	{
> > 		.old_name = "parent_func",
> > 		.stack_only = true,		    // stack only
> > 	},
> 
> Hm, this is different than how I understand it.
> 
> In the past I referred to the "parent" as the function which jumps to
> the cold ("child") function.  So maybe we're getting confused by
> different terminology.  But here I'll go with the naming from your
> example.

I think that I was primary confused by the selftest where "child"
function is livepatched and "parent" is defined as stack_only.

Miroslav told me yesterday that the function that jumps into
the .cold child needs to get livepatched. It makes sense
because .cold child does not have well defined functionality.
It depends on the compiler what code is put there.
Hence I added one more level...

> If parent_func() is stack_only, that could create some false positive
> scenarios where patching stalls unnecessarily.

Yes, it won't be optimal.


> Also, wouldn't all of child_func()'s callers have to be made
> stack_only?

Well, we already do this when handling compiler optimizations,
for example, inlining.


> How would you definitively find all the callers?

Good question. The best solution would be to get support from
the compiler like we already get for another optimizations.

We always have these problems how to find functions that need
special handling for livepatching.


> Instead I was thinking child_func.cold() should be stack_only.
> 
> e.g.:
> 
> static struct klp_func funcs[] = {
> 	{
> 		.old_name = "child_func",
> 		.new_func = livepatch_child_func,
> 	},
> 	{
> 		.old_name = "child_func.cold",
> 		.new_name = "livepatch_child_func.cold",
> 		.stack_only = true,
> 	},
> 
> Any reason why that wouldn't work?

Yes, it should work in the given example. I am just curious how this
would work in practice:


  1. The compiler might optimize the new code another way and there
     need not be 1:1 relation.

     We might need another set of stack_only functions checked when
     the livepatch is enabled. And another set of functions checked
     when the livepatch gets disabled.


  2. The names of "child_func.cold" functions are generated by
     the compiler. I mean that the names are "strange" ;-)

     It is likely easier with the kPatch approach that creates glue
     around already compiled symbols. It is more tricky when preparing
     the livepatch from sources. Well, it is doable.


BTW: livepatch_child_func.cold function must be checked on the stack
     also when the livepatch is replaced by another livepatch.

     I mean that we need to check two sets of stack only functions
     when replacing one livepatch with another one:

	+ "new_name" functions from to-be-replaced livepatch (like when disabling)
	+ "old_name" functions from new livepatch (like when enabling)


Note that I do not have any strong opinion about any approach at the
moment. I primary want to be sure that I understand the problem correctly :-)

Best Regards,
Petr
Miroslav Benes Dec. 16, 2021, 9:15 a.m. UTC | #7
On Wed, 15 Dec 2021, Josh Poimboeuf wrote:

> On Wed, Dec 15, 2021 at 03:37:26PM +0100, Petr Mladek wrote:
> > > Hm, this is different than how I understand it.
> > > 
> > > In the past I referred to the "parent" as the function which jumps to
> > > the cold ("child") function.  So maybe we're getting confused by
> > > different terminology.  But here I'll go with the naming from your
> > > example.
> > 
> > I think that I was primary confused by the selftest where "child"
> > function is livepatched and "parent" is defined as stack_only.
> 
> Ah, I guess I didn't look too closely at the selftest.

The selftest really does not help in understanding the problem. It is 
artificial just for the purpose of testing the API.

And as the discussion shows, there may be different scenarios in which the 
stack_only entries could be used. That does not help either.

Anyway...
 
> > > Instead I was thinking child_func.cold() should be stack_only.
> > > 
> > > e.g.:
> > > 
> > > static struct klp_func funcs[] = {
> > > 	{
> > > 		.old_name = "child_func",
> > > 		.new_func = livepatch_child_func,
> > > 	},
> > > 	{
> > > 		.old_name = "child_func.cold",
> > > 		.new_name = "livepatch_child_func.cold",
> > > 		.stack_only = true,
> > > 	},
> > > 
> > > Any reason why that wouldn't work?
> > 
> > Yes, it should work in the given example. I am just curious how this
> > would work in practice:
> > 
> > 
> >   1. The compiler might optimize the new code another way and there
> >      need not be 1:1 relation.
> >
> >      We might need another set of stack_only functions checked when
> >      the livepatch is enabled. And another set of functions checked
> >      when the livepatch gets disabled.
> 
> Regardless I'm thinking the above approach should be flexible enough.

Agreed. I think it is the best idea so far.
 
> If the patched child_func no longer has .cold, set 'new_name' to NULL in
> the stack_only entry.
> 
> If the original child_func doesn't have .cold, but patched child_func
> does, set 'old_name' to NULL in the stack_only entry.
> 
> If there were ever more than one of such "sub-functions" (which I
> believe currently doesn't happen), the author could create multiple
> stack_only entries.

All makes sense.

> >   2. The names of "child_func.cold" functions are generated by
> >      the compiler. I mean that the names are "strange" ;-)
> > 
> >      It is likely easier with the kPatch approach that creates glue
> >      around already compiled symbols. It is more tricky when preparing
> >      the livepatch from sources. Well, it is doable.
> 
> kpatch-build has checks for symbols with ".cold" substring.  I'm
> thinking it would be easy enough for you to do something similar since
> you're already checking for other compiler optimizations.

Yes, it should not be a problem.

> > BTW: livepatch_child_func.cold function must be checked on the stack
> >      also when the livepatch is replaced by another livepatch.
> > 
> >      I mean that we need to check two sets of stack only functions
> >      when replacing one livepatch with another one:
> > 
> > 	+ "new_name" functions from to-be-replaced livepatch (like when disabling)
> > 	+ "old_name" functions from new livepatch (like when enabling)
> 
> Urgh, this is starting to give me a headache.

Haha, it is always like this, isn't it? We come up with something which 
seems to be quite simple at the beginning and then all these different 
live patching features start to re-appear :D. I admit that it has already 
given me headaches.

> Could we put the cold funcs in a klp_ops func_stack to make this work
> automatically?

Maybe. It would definitely be nice to solve it for "almost free" somehow.

> Alternatively we could link the .cold functions to their non-cold
> counterparts somehow.  So when checking a function's stack, also check
> it's linked counterpart.  It could even be part of the original
> function's klp_func struct somehow, rather than having a dedicated
> klp_func struct for the stack_only thing.

On the other hand, we would lose an opportunity to have a solution also 
for non .cold cases. As Joe mentioned, he had to introduce artificial nops 
to functions just to have them in a final live patch. But yes, maybe it 
will end up as a too ambitious goal.

> Or we could just give up trying to abstract this entirely, and go back
> to Peter's suggestion to just always look for a ".cold" version of every
> function in klp_check_stack_func() :-)

No, not yet.

> I dunno...

I will prepare v3 and we will see then.

> > Note that I do not have any strong opinion about any approach at the
> > moment. I primary want to be sure that I understand the problem correctly :-)
> 
> Same here.

Thanks for the discussion. It definitely helps. I cannot say I 
understand the problem completely but it definitely helps :)

Miroslav
diff mbox series

Patch

diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h
index 2614247a9781..d09f89fe7921 100644
--- a/include/linux/livepatch.h
+++ b/include/linux/livepatch.h
@@ -29,6 +29,8 @@ 
  * @new_func:	pointer to the patched function code
  * @old_sympos: a hint indicating which symbol position the old function
  *		can be found (optional)
+ * @stack_only:	only search for the function (specified by old_name) on any
+ * 		task's stack
  * @old_func:	pointer to the function being patched
  * @kobj:	kobject for sysfs resources
  * @node:	list node for klp_object func_list
@@ -66,6 +68,7 @@  struct klp_func {
 	 * in kallsyms for the given object is used.
 	 */
 	unsigned long old_sympos;
+	bool stack_only;
 
 	/* internal */
 	void *old_func;
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 335d988bd811..62ff4180dc9b 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -89,6 +89,10 @@  static struct klp_func *klp_find_func(struct klp_object *obj,
 	struct klp_func *func;
 
 	klp_for_each_func(obj, func) {
+		/* Do not create nop klp_func for stack_only function */
+		if (func->stack_only)
+			return func;
+
 		if ((strcmp(old_func->old_name, func->old_name) == 0) &&
 		    (old_func->old_sympos == func->old_sympos)) {
 			return func;
@@ -499,6 +503,17 @@  static struct klp_func *klp_alloc_func_nop(struct klp_func *old_func,
 	return func;
 }
 
+static bool klp_is_object_stack_only(struct klp_object *old_obj)
+{
+	struct klp_func *old_func;
+
+	klp_for_each_func(old_obj, old_func)
+		if (!old_func->stack_only)
+			return false;
+
+	return true;
+}
+
 static int klp_add_object_nops(struct klp_patch *patch,
 			       struct klp_object *old_obj)
 {
@@ -508,6 +523,13 @@  static int klp_add_object_nops(struct klp_patch *patch,
 	obj = klp_find_object(patch, old_obj);
 
 	if (!obj) {
+		/*
+		 * Do not create nop klp_object for old_obj which contains
+		 * stack_only functions only.
+		 */
+		if (klp_is_object_stack_only(old_obj))
+			return 0;
+
 		obj = klp_alloc_object_dynamic(old_obj->name, patch);
 		if (!obj)
 			return -ENOMEM;
@@ -723,8 +745,9 @@  static int klp_init_func(struct klp_object *obj, struct klp_func *func)
 	/*
 	 * NOPs get the address later. The patched module must be loaded,
 	 * see klp_init_object_loaded().
+	 * stack_only functions do not get new_func addresses at all.
 	 */
-	if (!func->new_func && !func->nop)
+	if (!func->new_func && !func->nop && !func->stack_only)
 		return -EINVAL;
 
 	if (strlen(func->old_name) >= KSYM_NAME_LEN)
@@ -804,6 +827,9 @@  static int klp_init_object_loaded(struct klp_patch *patch,
 		if (func->nop)
 			func->new_func = func->old_func;
 
+		if (func->stack_only)
+			continue;
+
 		ret = kallsyms_lookup_size_offset((unsigned long)func->new_func,
 						  &func->new_size, NULL);
 		if (!ret) {
diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c
index fe316c021d73..221ed691cc7f 100644
--- a/kernel/livepatch/patch.c
+++ b/kernel/livepatch/patch.c
@@ -250,6 +250,9 @@  static void __klp_unpatch_object(struct klp_object *obj, bool nops_only)
 		if (nops_only && !func->nop)
 			continue;
 
+		if (func->stack_only)
+			continue;
+
 		if (func->patched)
 			klp_unpatch_func(func);
 	}
@@ -273,6 +276,9 @@  int klp_patch_object(struct klp_object *obj)
 		return -EINVAL;
 
 	klp_for_each_func(obj, func) {
+		if (func->stack_only)
+			continue;
+
 		ret = klp_patch_func(func);
 		if (ret) {
 			klp_unpatch_object(obj);
diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c
index 5683ac0d2566..fa0630fcab1a 100644
--- a/kernel/livepatch/transition.c
+++ b/kernel/livepatch/transition.c
@@ -200,7 +200,10 @@  static int klp_check_stack_func(struct klp_func *func, unsigned long *entries,
 	for (i = 0; i < nr_entries; i++) {
 		address = entries[i];
 
-		if (klp_target_state == KLP_UNPATCHED) {
+		if (func->stack_only) {
+			func_addr = (unsigned long)func->old_func;
+			func_size = func->old_size;
+		} else if (klp_target_state == KLP_UNPATCHED) {
 			 /*
 			  * Check for the to-be-unpatched function
 			  * (the func itself).