@@ -57,7 +57,7 @@ config ARM
select HAVE_KPROBES if !XIP_KERNEL
select HAVE_KRETPROBES if (HAVE_KPROBES)
select HAVE_MEMBLOCK
- select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
+ select HAVE_MOD_ARCH_SPECIFIC
select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -1705,6 +1705,21 @@ config HAVE_ARCH_TRANSPARENT_HUGEPAGE
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
+config ARM_MODULE_PLTS
+ bool "Use PLTs to allow module memory to spill over into vmalloc area"
+ depends on MODULES
+ help
+ Allocate PLTs when loading modules so that jumps and calls whose
+ targets are too far away for their relative offsets to be encoded
+ in the instructions themselves can be bounced via veneers in the
+ module's PLT. This allows modules to be allocated in the generic
+ vmalloc area after the dedicated module memory area has been
+ exhausted. The modules will use slightly more memory, but after
+ rounding up to page size, the actual memory footprint is usually
+ the same.
+
+ Say y if you are getting out of memory errors while loading modules
+
source "mm/Kconfig"
config FORCE_MAX_ZONEORDER
@@ -19,6 +19,10 @@ LDFLAGS_vmlinux += --be8
LDFLAGS_MODULE += --be8
endif
+ifeq ($(CONFIG_ARM_MODULE_PLTS),y)
+LDFLAGS_MODULE += -T $(srctree)/arch/arm/kernel/module.lds
+endif
+
OBJCOPYFLAGS :=-O binary -R .comment -S
GZFLAGS :=-9
#KBUILD_CFLAGS +=-pipe
@@ -16,11 +16,21 @@ enum {
ARM_SEC_UNLIKELY,
ARM_SEC_MAX,
};
+#endif
struct mod_arch_specific {
+#ifdef CONFIG_ARM_UNWIND
struct unwind_table *unwind[ARM_SEC_MAX];
-};
#endif
+#ifdef CONFIG_ARM_MODULE_PLTS
+ struct elf32_shdr *core_plt;
+ struct elf32_shdr *init_plt;
+ int core_plt_count;
+ int init_plt_count;
+#endif
+};
+
+u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val);
/*
* Add the ARM architecture version to the version magic string
@@ -35,6 +35,7 @@ obj-$(CONFIG_CPU_IDLE) += cpuidle.o
obj-$(CONFIG_ISA_DMA_API) += dma.o
obj-$(CONFIG_FIQ) += fiq.o fiqasm.o
obj-$(CONFIG_MODULES) += armksyms.o module.o
+obj-$(CONFIG_ARM_MODULE_PLTS) += module-plts.o
obj-$(CONFIG_ARTHUR) += arthur.o
obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
new file mode 100644
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/elf.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cache.h>
+#include <asm/opcodes.h>
+
+#define PLT_ENT_STRIDE L1_CACHE_BYTES
+#define PLT_ENT_COUNT (PLT_ENT_STRIDE / sizeof(u32))
+#define PLT_ENT_SIZE (sizeof(struct plt_entries) / PLT_ENT_COUNT)
+
+#ifdef CONFIG_THUMB2_KERNEL
+#define PLT_ENT_LDR __opcode_to_mem_thumb32(0xf8dff000 | \
+ (PLT_ENT_STRIDE - 4))
+#else
+#define PLT_ENT_LDR __opcode_to_mem_arm(0xe59ff000 | \
+ (PLT_ENT_STRIDE - 8))
+#endif
+
+struct plt_entries {
+ u32 ldr[PLT_ENT_COUNT];
+ u32 lit[PLT_ENT_COUNT];
+};
+
+static bool in_init(const struct module *mod, u32 addr)
+{
+ return addr - (u32)mod->module_init < mod->init_size;
+}
+
+u32 get_module_plt(struct module *mod, unsigned long loc, Elf32_Addr val)
+{
+ struct plt_entries *plt, *plt_end;
+ int c, *count;
+
+ if (in_init(mod, loc)) {
+ plt = (void *)mod->arch.init_plt->sh_addr;
+ plt_end = (void *)plt + mod->arch.init_plt->sh_size;
+ count = &mod->arch.init_plt_count;
+ } else {
+ plt = (void *)mod->arch.core_plt->sh_addr;
+ plt_end = (void *)plt + mod->arch.core_plt->sh_size;
+ count = &mod->arch.core_plt_count;
+ }
+
+ /* Look for an existing entry pointing to 'val' */
+ for (c = *count; plt < plt_end; c -= PLT_ENT_COUNT, plt++) {
+ int i;
+
+ if (!c) {
+ /* Populate a new set of entries */
+ *plt = (struct plt_entries){
+ { [0 ... PLT_ENT_COUNT - 1] = PLT_ENT_LDR, },
+ { val, }
+ };
+ ++*count;
+ return (u32)plt->ldr;
+ }
+ for (i = 0; i < PLT_ENT_COUNT; i++) {
+ if (!plt->lit[i]) {
+ plt->lit[i] = val;
+ ++*count;
+ }
+ if (plt->lit[i] == val)
+ return (u32)&plt->ldr[i];
+ }
+ }
+ BUG();
+}
+
+static int duplicate_rel(Elf32_Addr base, const Elf32_Rel *rel, int num,
+ u32 mask)
+{
+ u32 *loc1, *loc2;
+ int i;
+
+ for (i = 0; i < num; i++) {
+ if (rel[i].r_info != rel[num].r_info)
+ continue;
+
+ /*
+ * Identical relocation types against identical symbols can
+ * still result in different PLT entries if the addend in the
+ * place is different. So resolve the target of the relocation
+ * to compare the values.
+ */
+ loc1 = (u32 *)(base + rel[i].r_offset);
+ loc2 = (u32 *)(base + rel[num].r_offset);
+ if (((*loc1 ^ *loc2) & mask) == 0)
+ return 1;
+ }
+ return 0;
+}
+
+/* Count how many PLT entries we may need */
+static unsigned int count_plts(Elf32_Addr base, const Elf32_Rel *rel, int num)
+{
+ unsigned int ret = 0;
+ int i;
+
+ /*
+ * Sure, this is order(n^2), but it's usually short, and not
+ * time critical
+ */
+ for (i = 0; i < num; i++)
+ switch (ELF32_R_TYPE(rel[i].r_info)) {
+ case R_ARM_CALL:
+ case R_ARM_PC24:
+ case R_ARM_JUMP24:
+ if (!duplicate_rel(base, rel, i,
+ __opcode_to_mem_arm(0x00ffffff)))
+ ret++;
+ break;
+ case R_ARM_THM_CALL:
+ case R_ARM_THM_JUMP24:
+ if (!duplicate_rel(base, rel, i,
+ __opcode_to_mem_thumb32(0x07ff2fff)))
+ ret++;
+ }
+ return ret;
+}
+
+int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
+ char *secstrings, struct module *mod)
+{
+ unsigned long core_plts = 0, init_plts = 0;
+ Elf32_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
+
+ /*
+ * To store the PLTs, we expand the .text section for core module code
+ * and the .init.text section for initialization code.
+ */
+ for (s = sechdrs; s < sechdrs_end; ++s)
+ if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
+ mod->arch.core_plt = s;
+ else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
+ mod->arch.init_plt = s;
+
+ if (!mod->arch.core_plt || !mod->arch.init_plt) {
+ pr_err("%s: sections missing\n", mod->name);
+ return -ENOEXEC;
+ }
+
+ for (s = sechdrs + 1; s < sechdrs_end; ++s) {
+ const Elf32_Rel *rels = (void *)ehdr + s->sh_offset;
+ int numrels = s->sh_size / sizeof(Elf32_Rel);
+ Elf32_Shdr *dstsec = sechdrs + s->sh_info;
+
+ if (s->sh_type != SHT_REL)
+ continue;
+
+ if (strstr(secstrings + s->sh_name, ".init"))
+ init_plts += count_plts(dstsec->sh_addr, rels, numrels);
+ else
+ core_plts += count_plts(dstsec->sh_addr, rels, numrels);
+ }
+
+ mod->arch.core_plt->sh_type = SHT_NOBITS;
+ mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.core_plt->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.core_plt->sh_size = round_up(core_plts * PLT_ENT_SIZE,
+ sizeof(struct plt_entries));
+ mod->arch.core_plt_count = 0;
+
+ mod->arch.init_plt->sh_type = SHT_NOBITS;
+ mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ mod->arch.init_plt->sh_addralign = L1_CACHE_BYTES;
+ mod->arch.init_plt->sh_size = round_up(init_plts * PLT_ENT_SIZE,
+ sizeof(struct plt_entries));
+ mod->arch.init_plt_count = 0;
+ pr_debug("%s: core.plt=%x, init.plt=%x\n", __func__,
+ mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size);
+ return 0;
+}
+
@@ -40,7 +40,12 @@
#ifdef CONFIG_MMU
void *module_alloc(unsigned long size)
{
- return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ void *p = __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
+ GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE,
+ __builtin_return_address(0));
+ if (!IS_ENABLED(CONFIG_ARM_MODULE_PLTS) || p)
+ return p;
+ return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE,
__builtin_return_address(0));
}
@@ -110,6 +115,20 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
offset -= 0x04000000;
offset += sym->st_value - loc;
+
+ /*
+ * Route through a PLT entry if 'offset' exceeds the
+ * supported range. Note that 'offset + loc + 8'
+ * contains the absolute jump target, i.e.,
+ * @sym + addend, corrected for the +8 PC bias.
+ */
+ if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+ (offset <= (s32)0xfe000000 ||
+ offset >= (s32)0x02000000))
+ offset = get_module_plt(module, loc,
+ offset + loc + 8)
+ - loc - 8;
+
if (offset <= (s32)0xfe000000 ||
offset >= (s32)0x02000000) {
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
@@ -203,6 +222,17 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex,
offset -= 0x02000000;
offset += sym->st_value - loc;
+ /*
+ * Route through a PLT entry if 'offset' exceeds the
+ * supported range.
+ */
+ if (IS_ENABLED(CONFIG_ARM_MODULE_PLTS) &&
+ (offset <= (s32)0xff000000 ||
+ offset >= (s32)0x01000000))
+ offset = get_module_plt(module, loc,
+ offset + loc + 4)
+ - loc - 4;
+
if (offset <= (s32)0xff000000 ||
offset >= (s32)0x01000000) {
pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n",
new file mode 100644
@@ -0,0 +1,4 @@
+SECTIONS {
+ .core.plt : { BYTE(0) }
+ .init.plt : { BYTE(0) }
+}