@@ -320,6 +320,7 @@
311 64 process_vm_writev sys_process_vm_writev
312 common kcmp sys_kcmp
313 common finit_module sys_finit_module
+314 common vrange sys_vrange
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -13,6 +13,8 @@
#include <linux/page-debug-flags.h>
#include <linux/uprobes.h>
#include <linux/page-flags-layout.h>
+#include <linux/mutex.h>
+#include <linux/vrange_types.h>
#include <asm/page.h>
#include <asm/mmu.h>
@@ -351,6 +353,9 @@ struct mm_struct {
*/
+#ifdef CONFIG_MMU
+ struct vrange_root vroot;
+#endif
unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
@@ -66,4 +66,7 @@
#define MAP_HUGE_SHIFT 26
#define MAP_HUGE_MASK 0x3f
+#define VRANGE_VOLATILE 0 /* unpin pages so VM can discard them */
+#define VRANGE_NONVOLATILE 1 /* pin pages so VM can't discard them */
+
#endif /* __ASM_GENERIC_MMAN_COMMON_H */
@@ -70,6 +70,7 @@
#include <linux/khugepaged.h>
#include <linux/signalfd.h>
#include <linux/uprobes.h>
+#include <linux/vrange.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -541,6 +542,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
spin_lock_init(&mm->page_table_lock);
mm->free_area_cache = TASK_UNMAPPED_BASE;
mm->cached_hole_size = ~0UL;
+ vrange_root_init(&mm->vroot, VRANGE_MM);
mm_init_aio(mm);
mm_init_owner(mm, p);
@@ -612,6 +614,7 @@ void mmput(struct mm_struct *mm)
if (atomic_dec_and_test(&mm->mm_users)) {
uprobe_clear_state(mm);
+ vrange_root_cleanup(&mm->vroot);
exit_aio(mm);
ksm_exit(mm);
khugepaged_exit(mm); /* must run before exit_mmap */
@@ -4,6 +4,8 @@
#include <linux/vrange.h>
#include <linux/slab.h>
+#include <linux/syscalls.h>
+#include <linux/mman.h>
static struct kmem_cache *vrange_cachep;
@@ -163,3 +165,76 @@ void vrange_root_cleanup(struct vrange_root *vroot)
vrange_unlock(vroot);
}
+static int vrange_private(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ int mode, int *purged)
+{
+ int ret = -EINVAL;
+
+ if (mode == VRANGE_VOLATILE)
+ ret = vrange_add(&mm->vroot, start, end-1);
+ else if (mode == VRANGE_NONVOLATILE)
+ ret = vrange_remove(&mm->vroot, start, end-1, purged);
+
+ if (ret < 0)
+ return ret;
+ return end-start;
+}
+
+/*
+ * The vrange(2) system call.
+ *
+ * Applications can use vrange() to advise the kernel how it should
+ * handle paging I/O in this VM area. The idea is to help the kernel
+ * discard pages of vrange instead of swapping out when memory pressure
+ * happens. The information provided is advisory only, and can be safely
+ * disregarded by the kernel if system has enough free memory.
+ *
+ * mode values:
+ * VRANGE_VOLATILE - hint to kernel so VM can discard vrange pages when
+ * memory pressure happens.
+ * VRANGE_NONVOLATILE - Removes any volatile hints previous specified in that
+ * range.
+ *
+ * behavior values (bitflags): None yet supported.
+ *
+ * purged ptr:
+ * Returns 1 if any page in the range being marked nonvolatile has been purged.
+ *
+ * return values:
+ * non-negative - Number of bytes marked or unmarked.
+ * -EINVAL - start len < 0, start is not page-aligned, start is greater
+ * than TASK_SIZE or "mode" is not a valid value.
+ * -ENOMEM - Short of free memory in system for successful system call.
+ * -ENOSUP - Feature not yet supported.
+ */
+SYSCALL_DEFINE5(vrange, unsigned long, start,
+ size_t, len, int, mode, int, behavior, int*, purged)
+{
+ unsigned long end;
+ struct mm_struct *mm = current->mm;
+ int ret = -EINVAL;
+
+ /* We don't yet support any behavior modes */
+ if (behavior)
+ return -ENOTSUPP;
+
+ if (start & ~PAGE_MASK)
+ goto out;
+
+ len &= PAGE_MASK;
+ if (!len)
+ goto out;
+
+ end = start + len;
+ if (end < start)
+ goto out;
+
+ if (start >= TASK_SIZE)
+ goto out;
+
+ ret = vrange_private(mm, start, end, mode, purged);
+
+out:
+ return ret;
+}