Message ID | 20200707193423.v4.13.I15c8366d7a11d1eeea57e5ac4d0471a95725ce9e@changeid |
---|---|
State | Accepted |
Commit | 84d3ed125ad91c5973c4a071be5eea913bea34e5 |
Headers | show |
Series | x86: Enhance MTRR functionality to support multiple CPUs | expand |
Hi Simon, On Wed, Jul 8, 2020 at 9:37 AM Simon Glass <sjg at chromium.org> wrote: > > Add a way to run a function on a selection of CPUs. This supports either > a single CPU, all CPUs, just the main CPU or just the 'APs', in Intel > terminology. > > It works by writing into a mailbox and then waiting for the CPUs to notice > it, take action and indicate they are done. > > When SMP is not yet enabled, this just calls the function on the main CPU. > > Signed-off-by: Simon Glass <sjg at chromium.org> > Reviewed-by: Wolfgang Wallner <wolfgang.wallner at br-automation.com> > --- > > Changes in v4: > - Only enable this feature of CONFIG_SMP_AP_WORK is enabled > - Allow running on the BSP if SMP is not enabled > > Changes in v3: > - Add a comment to run_ap_work() > - Rename flag to GD_FLG_SMP_READY > - Update the comment for run_ap_work() to explain logical_cpu_number > - Clarify meaning of @cpu_select in mp_run_on_cpus() comment > > arch/x86/cpu/mp_init.c | 107 +++++++++++++++++++++++++++++++++++--- > arch/x86/include/asm/mp.h | 33 ++++++++++++ > 2 files changed, 134 insertions(+), 6 deletions(-) > > diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c > index 69a23829b9..dd6d6bfab7 100644 > --- a/arch/x86/cpu/mp_init.c > +++ b/arch/x86/cpu/mp_init.c > @@ -54,12 +54,7 @@ struct mp_flight_plan { > * callback > */ > struct mp_callback { > - /** > - * func() - Function to call on the AP > - * > - * @arg: Argument to pass > - */ > - void (*func)(void *arg); > + mp_run_func func; > void *arg; > int logical_cpu_number; > }; > @@ -514,6 +509,70 @@ static void store_callback(struct mp_callback **slot, struct mp_callback *val) > dmb(); > } > > +/** > + * run_ap_work() - Run a callback on selected APs > + * > + * This writes @callback to all APs and waits for them all to acknowledge it, > + * Note that whether each AP actually calls the callback depends on the value > + * of logical_cpu_number (see struct mp_callback). The logical CPU number is > + * the CPU device's req->seq value. > + * > + * @callback: Callback information to pass to all APs > + * @bsp: CPU device for the BSP > + * @num_cpus: The number of CPUs in the system (= number of APs + 1) > + * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for > + * no timeout > + * @return 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time > + */ > +static int run_ap_work(struct mp_callback *callback, struct udevice *bsp, > + int num_cpus, uint expire_ms) > +{ > + int cur_cpu = bsp->req_seq; > + int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */ > + int cpus_accepted; > + ulong start; > + int i; > + > + if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) { > + printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n"); > + return -ENOTSUPP; > + } > + > + /* Signal to all the APs to run the func. */ > + for (i = 0; i < num_cpus; i++) { > + if (cur_cpu != i) > + store_callback(&ap_callbacks[i], callback); > + } > + mfence(); > + > + /* Wait for all the APs to signal back that call has been accepted. */ > + start = get_timer(0); > + > + do { > + mdelay(1); > + cpus_accepted = 0; > + > + for (i = 0; i < num_cpus; i++) { > + if (cur_cpu == i) > + continue; > + if (!read_callback(&ap_callbacks[i])) > + cpus_accepted++; It looks my previous comments were not addressed. I believe there is a bug here that cpus_accepted will double count for the 2nd time in the do {} while () loop. > + } > + > + if (expire_ms && get_timer(start) >= expire_ms) { > + log(UCLASS_CPU, LOGL_CRIT, > + "AP call expired; %d/%d CPUs accepted\n", > + cpus_accepted, num_aps); > + return -ETIMEDOUT; > + } > + } while (cpus_accepted != num_aps); > + > + /* Make sure we can see any data written by the APs */ > + mfence(); > + > + return 0; > +} > + > /** > * ap_wait_for_instruction() - Wait for and process requests from the main CPU > * > @@ -573,6 +632,42 @@ static struct mp_flight_record mp_steps[] = { > MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL), > }; > > +int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg) > +{ > + struct mp_callback lcb = { > + .func = func, > + .arg = arg, > + .logical_cpu_number = cpu_select, > + }; > + struct udevice *dev; > + int num_cpus; > + int ret; > + > + ret = get_bsp(&dev, &num_cpus); > + if (ret < 0) > + return log_msg_ret("bsp", ret); > + if (cpu_select == MP_SELECT_ALL || cpu_select == MP_SELECT_BSP || > + cpu_select == ret) { > + /* Run on BSP first */ > + func(arg); > + } > + > + if (!IS_ENABLED(CONFIG_SMP_AP_WORK) || > + !(gd->flags & GD_FLG_SMP_READY)) { > + /* Allow use of this function on the BSP only */ > + if (cpu_select == MP_SELECT_BSP || !cpu_select) > + return 0; > + return -ENOTSUPP; > + } > + > + /* Allow up to 1 second for all APs to finish */ > + ret = run_ap_work(&lcb, dev, num_cpus, 1000 /* ms */); > + if (ret) > + return log_msg_ret("aps", ret); > + > + return 0; > +} > + > int mp_init(void) > { > int num_aps, num_cpus; > diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h > index 41b1575f4b..eb49e690f2 100644 > --- a/arch/x86/include/asm/mp.h > +++ b/arch/x86/include/asm/mp.h > @@ -86,4 +86,37 @@ int mp_init(void); > /* Set up additional CPUs */ > int x86_mp_init(void); > > +/** > + * mp_run_func() - Function to call on the AP > + * > + * @arg: Argument to pass > + */ > +typedef void (*mp_run_func)(void *arg); > + > +#if defined(CONFIG_SMP) && !CONFIG_IS_ENABLED(X86_64) > +/** > + * mp_run_on_cpus() - Run a function on one or all CPUs > + * > + * This does not return until all CPUs have completed the work > + * > + * Running on anything other than the boot CPU is only supported if > + * CONFIG_SMP_AP_WORK is enabled > + * > + * @cpu_select: CPU to run on (its dev->req_seq value), or MP_SELECT_ALL for > + * all, or MP_SELECT_BSP for BSP > + * @func: Function to run > + * @arg: Argument to pass to the function > + * @return 0 on success, -ve on error > + */ > +int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg); > +#else > +static inline int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg) > +{ > + /* There is only one CPU, so just call the function here */ > + func(arg); > + > + return 0; > +} > +#endif > + > #endif /* _X86_MP_H_ */ Regards, Bin
Hi Bin, On Sun, 12 Jul 2020 at 22:56, Bin Meng <bmeng.cn at gmail.com> wrote: > > Hi Simon, > > On Wed, Jul 8, 2020 at 9:37 AM Simon Glass <sjg at chromium.org> wrote: > > > > Add a way to run a function on a selection of CPUs. This supports either > > a single CPU, all CPUs, just the main CPU or just the 'APs', in Intel > > terminology. > > > > It works by writing into a mailbox and then waiting for the CPUs to notice > > it, take action and indicate they are done. > > > > When SMP is not yet enabled, this just calls the function on the main CPU. > > > > Signed-off-by: Simon Glass <sjg at chromium.org> > > Reviewed-by: Wolfgang Wallner <wolfgang.wallner at br-automation.com> > > --- > > > > Changes in v4: > > - Only enable this feature of CONFIG_SMP_AP_WORK is enabled > > - Allow running on the BSP if SMP is not enabled > > > > Changes in v3: > > - Add a comment to run_ap_work() > > - Rename flag to GD_FLG_SMP_READY > > - Update the comment for run_ap_work() to explain logical_cpu_number > > - Clarify meaning of @cpu_select in mp_run_on_cpus() comment > > > > arch/x86/cpu/mp_init.c | 107 +++++++++++++++++++++++++++++++++++--- > > arch/x86/include/asm/mp.h | 33 ++++++++++++ > > 2 files changed, 134 insertions(+), 6 deletions(-) > > > > diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c > > index 69a23829b9..dd6d6bfab7 100644 > > --- a/arch/x86/cpu/mp_init.c > > +++ b/arch/x86/cpu/mp_init.c > > @@ -54,12 +54,7 @@ struct mp_flight_plan { > > * callback > > */ > > struct mp_callback { > > - /** > > - * func() - Function to call on the AP > > - * > > - * @arg: Argument to pass > > - */ > > - void (*func)(void *arg); > > + mp_run_func func; > > void *arg; > > int logical_cpu_number; > > }; > > @@ -514,6 +509,70 @@ static void store_callback(struct mp_callback **slot, struct mp_callback *val) > > dmb(); > > } > > > > +/** > > + * run_ap_work() - Run a callback on selected APs > > + * > > + * This writes @callback to all APs and waits for them all to acknowledge it, > > + * Note that whether each AP actually calls the callback depends on the value > > + * of logical_cpu_number (see struct mp_callback). The logical CPU number is > > + * the CPU device's req->seq value. > > + * > > + * @callback: Callback information to pass to all APs > > + * @bsp: CPU device for the BSP > > + * @num_cpus: The number of CPUs in the system (= number of APs + 1) > > + * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for > > + * no timeout > > + * @return 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time > > + */ > > +static int run_ap_work(struct mp_callback *callback, struct udevice *bsp, > > + int num_cpus, uint expire_ms) > > +{ > > + int cur_cpu = bsp->req_seq; > > + int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */ > > + int cpus_accepted; > > + ulong start; > > + int i; > > + > > + if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) { > > + printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n"); > > + return -ENOTSUPP; > > + } > > + > > + /* Signal to all the APs to run the func. */ > > + for (i = 0; i < num_cpus; i++) { > > + if (cur_cpu != i) > > + store_callback(&ap_callbacks[i], callback); > > + } > > + mfence(); > > + > > + /* Wait for all the APs to signal back that call has been accepted. */ > > + start = get_timer(0); > > + > > + do { > > + mdelay(1); > > + cpus_accepted = 0; > > + > > + for (i = 0; i < num_cpus; i++) { > > + if (cur_cpu == i) > > + continue; > > + if (!read_callback(&ap_callbacks[i])) > > + cpus_accepted++; > > It looks my previous comments were not addressed. I believe there is a > bug here that cpus_accepted will double count for the 2nd time in the > do {} while () loop. I thought I replied to that. I don't see how that can happen since it is set to zero each time around the outer loop. Can you please speed it out? > > > + } > > + > > + if (expire_ms && get_timer(start) >= expire_ms) { > > + log(UCLASS_CPU, LOGL_CRIT, > > + "AP call expired; %d/%d CPUs accepted\n", > > + cpus_accepted, num_aps); > > + return -ETIMEDOUT; > > + } > > + } while (cpus_accepted != num_aps); > > + > > + /* Make sure we can see any data written by the APs */ > > + mfence(); > > + > > + return 0; > > +} > > + Regards, Simon
diff --git a/arch/x86/cpu/mp_init.c b/arch/x86/cpu/mp_init.c index 69a23829b9..dd6d6bfab7 100644 --- a/arch/x86/cpu/mp_init.c +++ b/arch/x86/cpu/mp_init.c @@ -54,12 +54,7 @@ struct mp_flight_plan { * callback */ struct mp_callback { - /** - * func() - Function to call on the AP - * - * @arg: Argument to pass - */ - void (*func)(void *arg); + mp_run_func func; void *arg; int logical_cpu_number; }; @@ -514,6 +509,70 @@ static void store_callback(struct mp_callback **slot, struct mp_callback *val) dmb(); } +/** + * run_ap_work() - Run a callback on selected APs + * + * This writes @callback to all APs and waits for them all to acknowledge it, + * Note that whether each AP actually calls the callback depends on the value + * of logical_cpu_number (see struct mp_callback). The logical CPU number is + * the CPU device's req->seq value. + * + * @callback: Callback information to pass to all APs + * @bsp: CPU device for the BSP + * @num_cpus: The number of CPUs in the system (= number of APs + 1) + * @expire_ms: Timeout to wait for all APs to finish, in milliseconds, or 0 for + * no timeout + * @return 0 if OK, -ETIMEDOUT if one or more APs failed to respond in time + */ +static int run_ap_work(struct mp_callback *callback, struct udevice *bsp, + int num_cpus, uint expire_ms) +{ + int cur_cpu = bsp->req_seq; + int num_aps = num_cpus - 1; /* number of non-BSPs to get this message */ + int cpus_accepted; + ulong start; + int i; + + if (!IS_ENABLED(CONFIG_SMP_AP_WORK)) { + printf("APs already parked. CONFIG_SMP_AP_WORK not enabled\n"); + return -ENOTSUPP; + } + + /* Signal to all the APs to run the func. */ + for (i = 0; i < num_cpus; i++) { + if (cur_cpu != i) + store_callback(&ap_callbacks[i], callback); + } + mfence(); + + /* Wait for all the APs to signal back that call has been accepted. */ + start = get_timer(0); + + do { + mdelay(1); + cpus_accepted = 0; + + for (i = 0; i < num_cpus; i++) { + if (cur_cpu == i) + continue; + if (!read_callback(&ap_callbacks[i])) + cpus_accepted++; + } + + if (expire_ms && get_timer(start) >= expire_ms) { + log(UCLASS_CPU, LOGL_CRIT, + "AP call expired; %d/%d CPUs accepted\n", + cpus_accepted, num_aps); + return -ETIMEDOUT; + } + } while (cpus_accepted != num_aps); + + /* Make sure we can see any data written by the APs */ + mfence(); + + return 0; +} + /** * ap_wait_for_instruction() - Wait for and process requests from the main CPU * @@ -573,6 +632,42 @@ static struct mp_flight_record mp_steps[] = { MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL, NULL, NULL), }; +int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg) +{ + struct mp_callback lcb = { + .func = func, + .arg = arg, + .logical_cpu_number = cpu_select, + }; + struct udevice *dev; + int num_cpus; + int ret; + + ret = get_bsp(&dev, &num_cpus); + if (ret < 0) + return log_msg_ret("bsp", ret); + if (cpu_select == MP_SELECT_ALL || cpu_select == MP_SELECT_BSP || + cpu_select == ret) { + /* Run on BSP first */ + func(arg); + } + + if (!IS_ENABLED(CONFIG_SMP_AP_WORK) || + !(gd->flags & GD_FLG_SMP_READY)) { + /* Allow use of this function on the BSP only */ + if (cpu_select == MP_SELECT_BSP || !cpu_select) + return 0; + return -ENOTSUPP; + } + + /* Allow up to 1 second for all APs to finish */ + ret = run_ap_work(&lcb, dev, num_cpus, 1000 /* ms */); + if (ret) + return log_msg_ret("aps", ret); + + return 0; +} + int mp_init(void) { int num_aps, num_cpus; diff --git a/arch/x86/include/asm/mp.h b/arch/x86/include/asm/mp.h index 41b1575f4b..eb49e690f2 100644 --- a/arch/x86/include/asm/mp.h +++ b/arch/x86/include/asm/mp.h @@ -86,4 +86,37 @@ int mp_init(void); /* Set up additional CPUs */ int x86_mp_init(void); +/** + * mp_run_func() - Function to call on the AP + * + * @arg: Argument to pass + */ +typedef void (*mp_run_func)(void *arg); + +#if defined(CONFIG_SMP) && !CONFIG_IS_ENABLED(X86_64) +/** + * mp_run_on_cpus() - Run a function on one or all CPUs + * + * This does not return until all CPUs have completed the work + * + * Running on anything other than the boot CPU is only supported if + * CONFIG_SMP_AP_WORK is enabled + * + * @cpu_select: CPU to run on (its dev->req_seq value), or MP_SELECT_ALL for + * all, or MP_SELECT_BSP for BSP + * @func: Function to run + * @arg: Argument to pass to the function + * @return 0 on success, -ve on error + */ +int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg); +#else +static inline int mp_run_on_cpus(int cpu_select, mp_run_func func, void *arg) +{ + /* There is only one CPU, so just call the function here */ + func(arg); + + return 0; +} +#endif + #endif /* _X86_MP_H_ */