Message ID | 20200914165042.96218-3-ldufour@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | mm: fix memory to node bad links in sysfs | expand |
> arch/ia64/mm/init.c | 4 +-- > drivers/base/node.c | 86 ++++++++++++++++++++++++++++---------------- > include/linux/node.h | 11 +++--- > mm/memory_hotplug.c | 5 +-- > 4 files changed, 68 insertions(+), 38 deletions(-) > > diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c > index b5054b5e77c8..8e7b8c6c576e 100644 > --- a/arch/ia64/mm/init.c > +++ b/arch/ia64/mm/init.c > @@ -538,7 +538,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) > if (map_start < map_end) > memmap_init_zone((unsigned long)(map_end - map_start), > args->nid, args->zone, page_to_pfn(map_start), > - MEMPLUG_EARLY, NULL); > + MEMINIT_EARLY, NULL); Patch #1. > return 0; > } > > @@ -548,7 +548,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone, > { > if (!vmem_map) { > memmap_init_zone(size, nid, zone, start_pfn, > - MEMPLUG_EARLY, NULL); > + MEMINIT_EARLY, NULL); > } else { > struct page *start; > struct memmap_init_callback_data args; > diff --git a/drivers/base/node.c b/drivers/base/node.c > index 508b80f6329b..01ee73c9d675 100644 > --- a/drivers/base/node.c > +++ b/drivers/base/node.c > @@ -761,14 +761,36 @@ static int __ref get_nid_for_pfn(unsigned long pfn) > return pfn_to_nid(pfn); > } > > +static int do_register_memory_block_under_node(int nid, > + struct memory_block *mem_blk) > +{ > + int ret; > + > + /* > + * If this memory block spans multiple nodes, we only indicate > + * the last processed node. > + */ > + mem_blk->nid = nid; > + > + ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, > + &mem_blk->dev.kobj, > + kobject_name(&mem_blk->dev.kobj)); > + if (ret) > + return ret; > + > + return sysfs_create_link_nowarn(&mem_blk->dev.kobj, > + &node_devices[nid]->dev.kobj, > + kobject_name(&node_devices[nid]->dev.kobj)); > +} > + > /* register memory section under specified node if it spans that node */ > -static int register_mem_sect_under_node(struct memory_block *mem_blk, > - void *arg) > +static int register_mem_block_under_node_early(struct memory_block *mem_blk, > + void *arg) > { > unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; > unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); > unsigned long end_pfn = start_pfn + memory_block_pfns - 1; > - int ret, nid = *(int *)arg; > + int nid = *(int *)arg; > unsigned long pfn; > > for (pfn = start_pfn; pfn <= end_pfn; pfn++) { > @@ -785,38 +807,34 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk, > } > > /* > - * We need to check if page belongs to nid only for the boot > - * case, during hotplug we know that all pages in the memory > - * block belong to the same node. > - */ > - if (system_state == SYSTEM_BOOTING) { > - page_nid = get_nid_for_pfn(pfn); > - if (page_nid < 0) > - continue; > - if (page_nid != nid) > - continue; > - } > - > - /* > - * If this memory block spans multiple nodes, we only indicate > - * the last processed node. > + * We need to check if page belongs to nid only at the boot > + * case because node's ranges can be interleaved. > */ > - mem_blk->nid = nid; > - > - ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, > - &mem_blk->dev.kobj, > - kobject_name(&mem_blk->dev.kobj)); > - if (ret) > - return ret; > + page_nid = get_nid_for_pfn(pfn); > + if (page_nid < 0) > + continue; > + if (page_nid != nid) > + continue; > > - return sysfs_create_link_nowarn(&mem_blk->dev.kobj, > - &node_devices[nid]->dev.kobj, > - kobject_name(&node_devices[nid]->dev.kobj)); > + /* The memory block is registered to the first matching node */ That comment is misleading in that context. A memory block is registered if there is at least a page that belongs to the nid. It's perfectly fine to have a single memory block belong to multiple NUMA nodes (when the split is within a memory block). I'd just drop it. [...] -- Thanks, David / dhildenb
Le 14/09/2020 à 19:15, David Hildenbrand a écrit : >> arch/ia64/mm/init.c | 4 +-- >> drivers/base/node.c | 86 ++++++++++++++++++++++++++++---------------- >> include/linux/node.h | 11 +++--- >> mm/memory_hotplug.c | 5 +-- >> 4 files changed, 68 insertions(+), 38 deletions(-) >> >> diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c >> index b5054b5e77c8..8e7b8c6c576e 100644 >> --- a/arch/ia64/mm/init.c >> +++ b/arch/ia64/mm/init.c >> @@ -538,7 +538,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) >> if (map_start < map_end) >> memmap_init_zone((unsigned long)(map_end - map_start), >> args->nid, args->zone, page_to_pfn(map_start), >> - MEMPLUG_EARLY, NULL); >> + MEMINIT_EARLY, NULL); > > Patch #1. Sure, this explains why I was able to build on ia64 but that's not the right place. >> return 0; >> } >> >> @@ -548,7 +548,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone, >> { >> if (!vmem_map) { >> memmap_init_zone(size, nid, zone, start_pfn, >> - MEMPLUG_EARLY, NULL); >> + MEMINIT_EARLY, NULL); I'll fix that too. >> } else { >> struct page *start; >> struct memmap_init_callback_data args; >> diff --git a/drivers/base/node.c b/drivers/base/node.c >> index 508b80f6329b..01ee73c9d675 100644 >> --- a/drivers/base/node.c >> +++ b/drivers/base/node.c >> @@ -761,14 +761,36 @@ static int __ref get_nid_for_pfn(unsigned long pfn) >> return pfn_to_nid(pfn); >> } >> >> +static int do_register_memory_block_under_node(int nid, >> + struct memory_block *mem_blk) >> +{ >> + int ret; >> + >> + /* >> + * If this memory block spans multiple nodes, we only indicate >> + * the last processed node. >> + */ >> + mem_blk->nid = nid; >> + >> + ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, >> + &mem_blk->dev.kobj, >> + kobject_name(&mem_blk->dev.kobj)); >> + if (ret) >> + return ret; >> + >> + return sysfs_create_link_nowarn(&mem_blk->dev.kobj, >> + &node_devices[nid]->dev.kobj, >> + kobject_name(&node_devices[nid]->dev.kobj)); >> +} >> + >> /* register memory section under specified node if it spans that node */ >> -static int register_mem_sect_under_node(struct memory_block *mem_blk, >> - void *arg) >> +static int register_mem_block_under_node_early(struct memory_block *mem_blk, >> + void *arg) >> { >> unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; >> unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); >> unsigned long end_pfn = start_pfn + memory_block_pfns - 1; >> - int ret, nid = *(int *)arg; >> + int nid = *(int *)arg; >> unsigned long pfn; >> >> for (pfn = start_pfn; pfn <= end_pfn; pfn++) { >> @@ -785,38 +807,34 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk, >> } >> >> /* >> - * We need to check if page belongs to nid only for the boot >> - * case, during hotplug we know that all pages in the memory >> - * block belong to the same node. >> - */ >> - if (system_state == SYSTEM_BOOTING) { >> - page_nid = get_nid_for_pfn(pfn); >> - if (page_nid < 0) >> - continue; >> - if (page_nid != nid) >> - continue; >> - } >> - >> - /* >> - * If this memory block spans multiple nodes, we only indicate >> - * the last processed node. >> + * We need to check if page belongs to nid only at the boot >> + * case because node's ranges can be interleaved. >> */ >> - mem_blk->nid = nid; >> - >> - ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, >> - &mem_blk->dev.kobj, >> - kobject_name(&mem_blk->dev.kobj)); >> - if (ret) >> - return ret; >> + page_nid = get_nid_for_pfn(pfn); >> + if (page_nid < 0) >> + continue; >> + if (page_nid != nid) >> + continue; >> >> - return sysfs_create_link_nowarn(&mem_blk->dev.kobj, >> - &node_devices[nid]->dev.kobj, >> - kobject_name(&node_devices[nid]->dev.kobj)); >> + /* The memory block is registered to the first matching node */ > > That comment is misleading in that context. > > A memory block is registered if there is at least a page that belongs to > the nid. It's perfectly fine to have a single memory block belong to > multiple NUMA nodes (when the split is within a memory block). I'd just > drop it. I agree the comment is not accurate, I'll drop it.
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index b5054b5e77c8..8e7b8c6c576e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -538,7 +538,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMPLUG_EARLY, NULL); + MEMINIT_EARLY, NULL); return 0; } @@ -548,7 +548,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone, { if (!vmem_map) { memmap_init_zone(size, nid, zone, start_pfn, - MEMPLUG_EARLY, NULL); + MEMINIT_EARLY, NULL); } else { struct page *start; struct memmap_init_callback_data args; diff --git a/drivers/base/node.c b/drivers/base/node.c index 508b80f6329b..01ee73c9d675 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -761,14 +761,36 @@ static int __ref get_nid_for_pfn(unsigned long pfn) return pfn_to_nid(pfn); } +static int do_register_memory_block_under_node(int nid, + struct memory_block *mem_blk) +{ + int ret; + + /* + * If this memory block spans multiple nodes, we only indicate + * the last processed node. + */ + mem_blk->nid = nid; + + ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, + &mem_blk->dev.kobj, + kobject_name(&mem_blk->dev.kobj)); + if (ret) + return ret; + + return sysfs_create_link_nowarn(&mem_blk->dev.kobj, + &node_devices[nid]->dev.kobj, + kobject_name(&node_devices[nid]->dev.kobj)); +} + /* register memory section under specified node if it spans that node */ -static int register_mem_sect_under_node(struct memory_block *mem_blk, - void *arg) +static int register_mem_block_under_node_early(struct memory_block *mem_blk, + void *arg) { unsigned long memory_block_pfns = memory_block_size_bytes() / PAGE_SIZE; unsigned long start_pfn = section_nr_to_pfn(mem_blk->start_section_nr); unsigned long end_pfn = start_pfn + memory_block_pfns - 1; - int ret, nid = *(int *)arg; + int nid = *(int *)arg; unsigned long pfn; for (pfn = start_pfn; pfn <= end_pfn; pfn++) { @@ -785,38 +807,34 @@ static int register_mem_sect_under_node(struct memory_block *mem_blk, } /* - * We need to check if page belongs to nid only for the boot - * case, during hotplug we know that all pages in the memory - * block belong to the same node. - */ - if (system_state == SYSTEM_BOOTING) { - page_nid = get_nid_for_pfn(pfn); - if (page_nid < 0) - continue; - if (page_nid != nid) - continue; - } - - /* - * If this memory block spans multiple nodes, we only indicate - * the last processed node. + * We need to check if page belongs to nid only at the boot + * case because node's ranges can be interleaved. */ - mem_blk->nid = nid; - - ret = sysfs_create_link_nowarn(&node_devices[nid]->dev.kobj, - &mem_blk->dev.kobj, - kobject_name(&mem_blk->dev.kobj)); - if (ret) - return ret; + page_nid = get_nid_for_pfn(pfn); + if (page_nid < 0) + continue; + if (page_nid != nid) + continue; - return sysfs_create_link_nowarn(&mem_blk->dev.kobj, - &node_devices[nid]->dev.kobj, - kobject_name(&node_devices[nid]->dev.kobj)); + /* The memory block is registered to the first matching node */ + return do_register_memory_block_under_node(nid, mem_blk); } /* mem section does not span the specified node */ return 0; } +/* + * During hotplug we know that all pages in the memory block belong to the same + * node. + */ +static int register_mem_block_under_node_hotplug(struct memory_block *mem_blk, + void *arg) +{ + int nid = *(int *)arg; + + return do_register_memory_block_under_node(nid, mem_blk); +} + /* * Unregister a memory block device under the node it spans. Memory blocks * with multiple nodes cannot be offlined and therefore also never be removed. @@ -832,11 +850,19 @@ void unregister_memory_block_under_nodes(struct memory_block *mem_blk) kobject_name(&node_devices[mem_blk->nid]->dev.kobj)); } -int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn) +int link_mem_sections(int nid, unsigned long start_pfn, unsigned long end_pfn, + enum meminit_context context) { + walk_memory_blocks_func_t func; + + if (context == MEMINIT_HOTPLUG) + func = register_mem_block_under_node_hotplug; + else + func = register_mem_block_under_node_early; + return walk_memory_blocks(PFN_PHYS(start_pfn), PFN_PHYS(end_pfn - start_pfn), (void *)&nid, - register_mem_sect_under_node); + func); } #ifdef CONFIG_HUGETLBFS diff --git a/include/linux/node.h b/include/linux/node.h index 4866f32a02d8..014ba3ab2efd 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -99,11 +99,13 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) -extern int link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn); +int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long end_pfn, + enum meminit_context context); #else static inline int link_mem_sections(int nid, unsigned long start_pfn, - unsigned long end_pfn) + unsigned long end_pfn, + enum meminit_context context) { return 0; } @@ -128,7 +130,8 @@ static inline int register_one_node(int nid) if (error) return error; /* link memory sections under this node */ - error = link_mem_sections(nid, start_pfn, end_pfn); + error = link_mem_sections(nid, start_pfn, end_pfn, + MEMINIT_EARLY); } return error; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fc21625e42de..03df20078827 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -729,7 +729,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn, * are reserved so nobody should be touching them so we should be safe */ memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, - MEMPLUG_HOTPLUG, altmap); + MEMINIT_HOTPLUG, altmap); set_zone_contiguous(zone); } @@ -1080,7 +1080,8 @@ int __ref add_memory_resource(int nid, struct resource *res) } /* link memory sections under this node.*/ - ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1)); + ret = link_mem_sections(nid, PFN_DOWN(start), PFN_UP(start + size - 1), + MEMINIT_HOTPLUG); BUG_ON(ret); /* create new memmap entry */