summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c37
-rw-r--r--mm/bootmem.c17
-rw-r--r--mm/hugetlb.c7
-rw-r--r--mm/ksm.c12
-rw-r--r--mm/memcontrol.c24
-rw-r--r--mm/memory.c3
-rw-r--r--mm/mlock.c41
-rw-r--r--mm/mmap.c113
-rw-r--r--mm/pagewalk.c47
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/rmap.c42
-rw-r--r--mm/slab.c13
-rw-r--r--mm/slub.c5
-rw-r--r--mm/util.c21
-rw-r--r--mm/vmscan.c23
15 files changed, 249 insertions, 158 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca0347707..707d0dc6da0f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -11,6 +11,8 @@
#include <linux/writeback.h>
#include <linux/device.h>
+static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
+
void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
{
}
@@ -25,6 +27,11 @@ struct backing_dev_info default_backing_dev_info = {
};
EXPORT_SYMBOL_GPL(default_backing_dev_info);
+struct backing_dev_info noop_backing_dev_info = {
+ .name = "noop",
+};
+EXPORT_SYMBOL_GPL(noop_backing_dev_info);
+
static struct class *bdi_class;
/*
@@ -227,6 +234,9 @@ static struct device_attribute bdi_dev_attrs[] = {
static __init int bdi_class_init(void)
{
bdi_class = class_create(THIS_MODULE, "bdi");
+ if (IS_ERR(bdi_class))
+ return PTR_ERR(bdi_class);
+
bdi_class->dev_attrs = bdi_dev_attrs;
bdi_debug_init();
return 0;
@@ -712,6 +722,33 @@ void bdi_destroy(struct backing_dev_info *bdi)
}
EXPORT_SYMBOL(bdi_destroy);
+/*
+ * For use from filesystems to quickly init and register a bdi associated
+ * with dirty writeback
+ */
+int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
+ unsigned int cap)
+{
+ char tmp[32];
+ int err;
+
+ bdi->name = name;
+ bdi->capabilities = cap;
+ err = bdi_init(bdi);
+ if (err)
+ return err;
+
+ sprintf(tmp, "%.28s%s", name, "-%d");
+ err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq));
+ if (err) {
+ bdi_destroy(bdi);
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(bdi_setup_and_register);
+
static wait_queue_head_t congestion_wqh[2] = {
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
diff --git a/mm/bootmem.c b/mm/bootmem.c
index eff224220571..58c66cc5056a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -304,9 +304,22 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
unsigned long __init free_all_bootmem(void)
{
#ifdef CONFIG_NO_BOOTMEM
- return free_all_memory_core_early(NODE_DATA(0)->node_id);
+ /*
+ * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
+ * because in some case like Node0 doesnt have RAM installed
+ * low ram will be on Node1
+ * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
+ * will be used instead of only Node0 related
+ */
+ return free_all_memory_core_early(MAX_NUMNODES);
#else
- return free_all_bootmem_core(NODE_DATA(0)->bdata);
+ unsigned long total_pages = 0;
+ bootmem_data_t *bdata;
+
+ list_for_each_entry(bdata, &bdata_list, list)
+ total_pages += free_all_bootmem_core(bdata);
+
+ return total_pages;
#endif
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6034dc9e9796..4c9e6bbf3772 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -546,6 +546,7 @@ static void free_huge_page(struct page *page)
mapping = (struct address_space *) page_private(page);
set_page_private(page, 0);
+ page->mapping = NULL;
BUG_ON(page_count(page));
INIT_LIST_HEAD(&page->lru);
@@ -1038,7 +1039,7 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
page = alloc_buddy_huge_page(h, vma, addr);
if (!page) {
hugetlb_put_quota(inode->i_mapping, chg);
- return ERR_PTR(-VM_FAULT_OOM);
+ return ERR_PTR(-VM_FAULT_SIGBUS);
}
}
@@ -2447,8 +2448,10 @@ retry:
spin_lock(&inode->i_lock);
inode->i_blocks += blocks_per_huge_page(h);
spin_unlock(&inode->i_lock);
- } else
+ } else {
lock_page(page);
+ page->mapping = HUGETLB_POISON;
+ }
}
/*
diff --git a/mm/ksm.c b/mm/ksm.c
index 8cdfc2a1e8bf..956880f2ff49 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -365,7 +365,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
do {
cond_resched();
page = follow_page(vma, addr, FOLL_GET);
- if (!page)
+ if (IS_ERR_OR_NULL(page))
break;
if (PageKsm(page))
ret = handle_mm_fault(vma->vm_mm, vma, addr,
@@ -447,7 +447,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
goto out;
page = follow_page(vma, addr, FOLL_GET);
- if (!page)
+ if (IS_ERR_OR_NULL(page))
goto out;
if (PageAnon(page)) {
flush_anon_page(vma, page, addr);
@@ -1086,7 +1086,7 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
cond_resched();
tree_rmap_item = rb_entry(*new, struct rmap_item, node);
tree_page = get_mergeable_page(tree_rmap_item);
- if (!tree_page)
+ if (IS_ERR_OR_NULL(tree_page))
return NULL;
/*
@@ -1294,7 +1294,7 @@ next_mm:
if (ksm_test_exit(mm))
break;
*page = follow_page(vma, ksm_scan.address, FOLL_GET);
- if (*page && PageAnon(*page)) {
+ if (!IS_ERR_OR_NULL(*page) && PageAnon(*page)) {
flush_anon_page(vma, *page, ksm_scan.address);
flush_dcache_page(*page);
rmap_item = get_next_rmap_item(slot,
@@ -1308,7 +1308,7 @@ next_mm:
up_read(&mm->mmap_sem);
return rmap_item;
}
- if (*page)
+ if (!IS_ERR_OR_NULL(*page))
put_page(*page);
ksm_scan.address += PAGE_SIZE;
cond_resched();
@@ -1367,7 +1367,7 @@ next_mm:
static void ksm_do_scan(unsigned int scan_npages)
{
struct rmap_item *rmap_item;
- struct page *page;
+ struct page *uninitialized_var(page);
while (scan_npages--) {
cond_resched();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9ed760dc7448..8a79a6f0f029 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1359,16 +1359,19 @@ void mem_cgroup_update_file_mapped(struct page *page, int val)
lock_page_cgroup(pc);
mem = pc->mem_cgroup;
- if (!mem)
- goto done;
-
- if (!PageCgroupUsed(pc))
+ if (!mem || !PageCgroupUsed(pc))
goto done;
/*
* Preemption is already disabled. We can use __this_cpu_xxx
*/
- __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], val);
+ if (val > 0) {
+ __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+ SetPageCgroupFileMapped(pc);
+ } else {
+ __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
+ ClearPageCgroupFileMapped(pc);
+ }
done:
unlock_page_cgroup(pc);
@@ -1598,7 +1601,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
* There is a small race that "from" or "to" can be
* freed by rmdir, so we use css_tryget().
*/
- rcu_read_lock();
from = mc.from;
to = mc.to;
if (from && css_tryget(&from->css)) {
@@ -1619,7 +1621,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
do_continue = (to == mem_over_limit);
css_put(&to->css);
}
- rcu_read_unlock();
if (do_continue) {
DEFINE_WAIT(wait);
prepare_to_wait(&mc.waitq, &wait,
@@ -1801,16 +1802,13 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
static void __mem_cgroup_move_account(struct page_cgroup *pc,
struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
{
- struct page *page;
-
VM_BUG_ON(from == to);
VM_BUG_ON(PageLRU(pc->page));
VM_BUG_ON(!PageCgroupLocked(pc));
VM_BUG_ON(!PageCgroupUsed(pc));
VM_BUG_ON(pc->mem_cgroup != from);
- page = pc->page;
- if (page_mapped(page) && !PageAnon(page)) {
+ if (PageCgroupFileMapped(pc)) {
/* Update mapped_file data for mem_cgroup */
preempt_disable();
__this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
@@ -2429,11 +2427,11 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
}
unlock_page_cgroup(pc);
+ *ptr = mem;
if (mem) {
- ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false);
+ ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false);
css_put(&mem->css);
}
- *ptr = mem;
return ret;
}
diff --git a/mm/memory.c b/mm/memory.c
index 1d2ea39260e5..833952d8b74d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -125,13 +125,12 @@ core_initcall(init_zero_pfn);
#if defined(SPLIT_RSS_COUNTING)
-void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
+static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
{
int i;
for (i = 0; i < NR_MM_COUNTERS; i++) {
if (task->rss_stat.count[i]) {
- BUG_ON(!mm);
add_mm_counter(mm, i, task->rss_stat.count[i]);
task->rss_stat.count[i] = 0;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index 8f4e2dfceec1..3f82720e0515 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -607,44 +607,3 @@ void user_shm_unlock(size_t size, struct user_struct *user)
spin_unlock(&shmlock_user_lock);
free_uid(user);
}
-
-int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
- size_t size)
-{
- unsigned long lim, vm, pgsz;
- int error = -ENOMEM;
-
- pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- down_write(&mm->mmap_sem);
-
- lim = ACCESS_ONCE(rlim[RLIMIT_AS].rlim_cur) >> PAGE_SHIFT;
- vm = mm->total_vm + pgsz;
- if (lim < vm)
- goto out;
-
- lim = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
- vm = mm->locked_vm + pgsz;
- if (lim < vm)
- goto out;
-
- mm->total_vm += pgsz;
- mm->locked_vm += pgsz;
-
- error = 0;
- out:
- up_write(&mm->mmap_sem);
- return error;
-}
-
-void refund_locked_memory(struct mm_struct *mm, size_t size)
-{
- unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- down_write(&mm->mmap_sem);
-
- mm->total_vm -= pgsz;
- mm->locked_vm -= pgsz;
-
- up_write(&mm->mmap_sem);
-}
diff --git a/mm/mmap.c b/mm/mmap.c
index 75557c639ad4..456ec6f27889 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
struct address_space *mapping = NULL;
struct prio_tree_root *root = NULL;
struct file *file = vma->vm_file;
- struct anon_vma *anon_vma = NULL;
long adjust_next = 0;
int remove_next = 0;
if (next && !insert) {
+ struct vm_area_struct *exporter = NULL;
+
if (end >= next->vm_end) {
/*
* vma expands, overlapping all the next, and
@@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
*/
again: remove_next = 1 + (end > next->vm_end);
end = next->vm_end;
- anon_vma = next->anon_vma;
+ exporter = next;
importer = vma;
} else if (end > next->vm_start) {
/*
@@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end);
* mprotect case 5 shifting the boundary up.
*/
adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
- anon_vma = next->anon_vma;
+ exporter = next;
importer = vma;
} else if (end < vma->vm_end) {
/*
@@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end);
* mprotect case 4 shifting the boundary down.
*/
adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
- anon_vma = next->anon_vma;
+ exporter = vma;
importer = next;
}
- }
- /*
- * When changing only vma->vm_end, we don't really need anon_vma lock.
- */
- if (vma->anon_vma && (insert || importer || start != vma->vm_start))
- anon_vma = vma->anon_vma;
- if (anon_vma) {
/*
* Easily overlooked: when mprotect shifts the boundary,
* make sure the expanding vma has anon_vma set if the
* shrinking vma had, to cover any anon pages imported.
*/
- if (importer && !importer->anon_vma) {
- /* Block reverse map lookups until things are set up. */
- if (anon_vma_clone(importer, vma)) {
+ if (exporter && exporter->anon_vma && !importer->anon_vma) {
+ if (anon_vma_clone(importer, exporter))
return -ENOMEM;
- }
- importer->anon_vma = anon_vma;
+ importer->anon_vma = exporter->anon_vma;
}
}
@@ -825,6 +817,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
}
/*
+ * Rough compatbility check to quickly see if it's even worth looking
+ * at sharing an anon_vma.
+ *
+ * They need to have the same vm_file, and the flags can only differ
+ * in things that mprotect may change.
+ *
+ * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
+ * we can merge the two vma's. For example, we refuse to merge a vma if
+ * there is a vm_ops->close() function, because that indicates that the
+ * driver is doing some kind of reference counting. But that doesn't
+ * really matter for the anon_vma sharing case.
+ */
+static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
+{
+ return a->vm_end == b->vm_start &&
+ mpol_equal(vma_policy(a), vma_policy(b)) &&
+ a->vm_file == b->vm_file &&
+ !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
+ b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
+}
+
+/*
+ * Do some basic sanity checking to see if we can re-use the anon_vma
+ * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
+ * the same as 'old', the other will be the new one that is trying
+ * to share the anon_vma.
+ *
+ * NOTE! This runs with mm_sem held for reading, so it is possible that
+ * the anon_vma of 'old' is concurrently in the process of being set up
+ * by another page fault trying to merge _that_. But that's ok: if it
+ * is being set up, that automatically means that it will be a singleton
+ * acceptable for merging, so we can do all of this optimistically. But
+ * we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
+ *
+ * IOW: that the "list_is_singular()" test on the anon_vma_chain only
+ * matters for the 'stable anon_vma' case (ie the thing we want to avoid
+ * is to return an anon_vma that is "complex" due to having gone through
+ * a fork).
+ *
+ * We also make sure that the two vma's are compatible (adjacent,
+ * and with the same memory policies). That's all stable, even with just
+ * a read lock on the mm_sem.
+ */
+static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
+{
+ if (anon_vma_compatible(a, b)) {
+ struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
+
+ if (anon_vma && list_is_singular(&old->anon_vma_chain))
+ return anon_vma;
+ }
+ return NULL;
+}
+
+/*
* find_mergeable_anon_vma is used by anon_vma_prepare, to check
* neighbouring vmas for a suitable anon_vma, before it goes off
* to allocate a new anon_vma. It checks because a repetitive
@@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
*/
struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
{
+ struct anon_vma *anon_vma;
struct vm_area_struct *near;
- unsigned long vm_flags;
near = vma->vm_next;
if (!near)
goto try_prev;
- /*
- * Since only mprotect tries to remerge vmas, match flags
- * which might be mprotected into each other later on.
- * Neither mlock nor madvise tries to remerge at present,
- * so leave their flags as obstructing a merge.
- */
- vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
- vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
-
- if (near->anon_vma && vma->vm_end == near->vm_start &&
- mpol_equal(vma_policy(vma), vma_policy(near)) &&
- can_vma_merge_before(near, vm_flags,
- NULL, vma->vm_file, vma->vm_pgoff +
- ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
- return near->anon_vma;
+ anon_vma = reusable_anon_vma(near, vma, near);
+ if (anon_vma)
+ return anon_vma;
try_prev:
/*
* It is potentially slow to have to call find_vma_prev here.
@@ -868,14 +903,9 @@ try_prev:
if (!near)
goto none;
- vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
- vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
-
- if (near->anon_vma && near->vm_end == vma->vm_start &&
- mpol_equal(vma_policy(near), vma_policy(vma)) &&
- can_vma_merge_after(near, vm_flags,
- NULL, vma->vm_file, vma->vm_pgoff))
- return near->anon_vma;
+ anon_vma = reusable_anon_vma(near, near, vma);
+ if (anon_vma)
+ return anon_vma;
none:
/*
* There's no absolute need to look only at touching neighbours:
@@ -1947,7 +1977,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
return 0;
/* Clean everything up if vma_adjust failed. */
- new->vm_ops->close(new);
+ if (new->vm_ops && new->vm_ops->close)
+ new->vm_ops->close(new);
if (new->vm_file) {
if (vma->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(mm);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 7b47a57b6646..8b1a2ce21ee5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -80,6 +80,37 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
return err;
}
+#ifdef CONFIG_HUGETLB_PAGE
+static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
+ unsigned long end)
+{
+ unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
+ return boundary < end ? boundary : end;
+}
+
+static int walk_hugetlb_range(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct hstate *h = hstate_vma(vma);
+ unsigned long next;
+ unsigned long hmask = huge_page_mask(h);
+ pte_t *pte;
+ int err = 0;
+
+ do {
+ next = hugetlb_entry_end(h, addr, end);
+ pte = huge_pte_offset(walk->mm, addr & hmask);
+ if (pte && walk->hugetlb_entry)
+ err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
+ if (err)
+ return err;
+ } while (addr = next, addr != end);
+
+ return 0;
+}
+#endif
+
/**
* walk_page_range - walk a memory map's page tables with a callback
* @mm: memory map to walk
@@ -128,20 +159,16 @@ int walk_page_range(unsigned long addr, unsigned long end,
vma = find_vma(walk->mm, addr);
#ifdef CONFIG_HUGETLB_PAGE
if (vma && is_vm_hugetlb_page(vma)) {
- pte_t *pte;
- struct hstate *hs;
-
if (vma->vm_end < next)
next = vma->vm_end;
- hs = hstate_vma(vma);
- pte = huge_pte_offset(walk->mm,
- addr & huge_page_mask(hs));
- if (pte && !huge_pte_none(huge_ptep_get(pte))
- && walk->hugetlb_entry)
- err = walk->hugetlb_entry(pte, addr,
- next, walk);
+ /*
+ * Hugepage is very tightly coupled with vma, so
+ * walk through hugetlb entries within a given vma.
+ */
+ err = walk_hugetlb_range(vma, addr, next, walk);
if (err)
break;
+ pgd = pgd_offset(walk->mm, next);
continue;
}
#endif
diff --git a/mm/readahead.c b/mm/readahead.c
index 999b54bb462f..dfa9a1a03a11 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -503,7 +503,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
return;
/* be dumb */
- if (filp->f_mode & FMODE_RANDOM) {
+ if (filp && (filp->f_mode & FMODE_RANDOM)) {
force_page_cache_readahead(mapping, filp, offset, req_size);
return;
}
diff --git a/mm/rmap.c b/mm/rmap.c
index eaa7a09eb72e..0feeef860a8f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -133,8 +133,8 @@ int anon_vma_prepare(struct vm_area_struct *vma)
goto out_enomem_free_avc;
allocated = anon_vma;
}
- spin_lock(&anon_vma->lock);
+ spin_lock(&anon_vma->lock);
/* page_table_lock to protect against threads */
spin_lock(&mm->page_table_lock);
if (likely(!vma->anon_vma)) {
@@ -144,14 +144,15 @@ int anon_vma_prepare(struct vm_area_struct *vma)
list_add(&avc->same_vma, &vma->anon_vma_chain);
list_add(&avc->same_anon_vma, &anon_vma->head);
allocated = NULL;
+ avc = NULL;
}
spin_unlock(&mm->page_table_lock);
-
spin_unlock(&anon_vma->lock);
- if (unlikely(allocated)) {
+
+ if (unlikely(allocated))
anon_vma_free(allocated);
+ if (unlikely(avc))
anon_vma_chain_free(avc);
- }
}
return 0;
@@ -182,7 +183,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
struct anon_vma_chain *avc, *pavc;
- list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
+ list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
avc = anon_vma_chain_alloc();
if (!avc)
goto enomem_failure;
@@ -335,14 +336,13 @@ vma_address(struct page *page, struct vm_area_struct *vma)
/*
* At what user virtual address is page expected in vma?
- * checking that the page matches the vma.
+ * Caller should check the page is actually part of the vma.
*/
unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)
{
- if (PageAnon(page)) {
- if (vma->anon_vma != page_anon_vma(page))
- return -EFAULT;
- } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
+ if (PageAnon(page))
+ ;
+ else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) {
if (!vma->vm_file ||
vma->vm_file->f_mapping != page->mapping)
return -EFAULT;
@@ -730,13 +730,29 @@ void page_move_anon_rmap(struct page *page,
* @page: the page to add the mapping to
* @vma: the vm area in which the mapping is added
* @address: the user virtual address mapped
+ * @exclusive: the page is exclusively owned by the current process
*/
static void __page_set_anon_rmap(struct page *page,
- struct vm_area_struct *vma, unsigned long address)
+ struct vm_area_struct *vma, unsigned long address, int exclusive)
{
struct anon_vma *anon_vma = vma->anon_vma;
BUG_ON(!anon_vma);
+
+ /*
+ * If the page isn't exclusively mapped into this vma,
+ * we must use the _oldest_ possible anon_vma for the
+ * page mapping!
+ *
+ * So take the last AVC chain entry in the vma, which is
+ * the deepest ancestor, and use the anon_vma from that.
+ */
+ if (!exclusive) {
+ struct anon_vma_chain *avc;
+ avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma);
+ anon_vma = avc->anon_vma;
+ }
+
anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
page->mapping = (struct address_space *) anon_vma;
page->index = linear_page_index(vma, address);
@@ -791,7 +807,7 @@ void page_add_anon_rmap(struct page *page,
VM_BUG_ON(!PageLocked(page));
VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
if (first)
- __page_set_anon_rmap(page, vma, address);
+ __page_set_anon_rmap(page, vma, address, 0);
else
__page_check_anon_rmap(page, vma, address);
}
@@ -813,7 +829,7 @@ void page_add_new_anon_rmap(struct page *page,
SetPageSwapBacked(page);
atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
__inc_zone_page_state(page, NR_ANON_PAGES);
- __page_set_anon_rmap(page, vma, address);
+ __page_set_anon_rmap(page, vma, address, 1);
if (page_evictable(page, vma))
lru_cache_add_lru(page, LRU_ACTIVE_ANON);
else
diff --git a/mm/slab.c b/mm/slab.c
index a9f325b28bed..bac0f4fcc216 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3602,21 +3602,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_notrace);
*/
int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
{
- unsigned long addr = (unsigned long)ptr;
- unsigned long min_addr = PAGE_OFFSET;
- unsigned long align_mask = BYTES_PER_WORD - 1;
unsigned long size = cachep->buffer_size;
struct page *page;
- if (unlikely(addr < min_addr))
- goto out;
- if (unlikely(addr > (unsigned long)high_memory - size))
- goto out;
- if (unlikely(addr & align_mask))
- goto out;
- if (unlikely(!kern_addr_valid(addr)))
- goto out;
- if (unlikely(!kern_addr_valid(addr + size - 1)))
+ if (unlikely(!kern_ptr_validate(ptr, size)))
goto out;
page = virt_to_page(ptr);
if (unlikely(!PageSlab(page)))
diff --git a/mm/slub.c b/mm/slub.c
index b364844a1068..d2a54fe71ea2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2153,7 +2153,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
int local_node;
if (slab_state >= UP && (s < kmalloc_caches ||
- s > kmalloc_caches + KMALLOC_CACHES))
+ s >= kmalloc_caches + KMALLOC_CACHES))
local_node = page_to_nid(virt_to_page(s));
else
local_node = 0;
@@ -2386,6 +2386,9 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
{
struct page *page;
+ if (!kern_ptr_validate(object, s->size))
+ return 0;
+
page = get_object_page(object);
if (!page || s != page->slab)
diff --git a/mm/util.c b/mm/util.c
index 834db7be240f..f5712e8964be 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -186,6 +186,27 @@ void kzfree(const void *p)
}
EXPORT_SYMBOL(kzfree);
+int kern_ptr_validate(const void *ptr, unsigned long size)
+{
+ unsigned long addr = (unsigned long)ptr;
+ unsigned long min_addr = PAGE_OFFSET;
+ unsigned long align_mask = sizeof(void *) - 1;
+
+ if (unlikely(addr < min_addr))
+ goto out;
+ if (unlikely(addr > (unsigned long)high_memory - size))
+ goto out;
+ if (unlikely(addr & align_mask))
+ goto out;
+ if (unlikely(!kern_addr_valid(addr)))
+ goto out;
+ if (unlikely(!kern_addr_valid(addr + size - 1)))
+ goto out;
+ return 1;
+out:
+ return 0;
+}
+
/*
* strndup_user - duplicate an existing string from user space
* @s: The string to duplicate
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e0e5f15bb726..3ff3311447f5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1535,13 +1535,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
unsigned long ap, fp;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
- /* If we have no swap space, do not bother scanning anon pages. */
- if (!sc->may_swap || (nr_swap_pages <= 0)) {
- percent[0] = 0;
- percent[1] = 100;
- return;
- }
-
anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
@@ -1639,20 +1632,22 @@ static void shrink_zone(int priority, struct zone *zone,
unsigned long nr_reclaimed = sc->nr_reclaimed;
unsigned long nr_to_reclaim = sc->nr_to_reclaim;
struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
+ int noswap = 0;
- get_scan_ratio(zone, sc, percent);
+ /* If we have no swap space, do not bother scanning anon pages. */
+ if (!sc->may_swap || (nr_swap_pages <= 0)) {
+ noswap = 1;
+ percent[0] = 0;
+ percent[1] = 100;
+ } else
+ get_scan_ratio(zone, sc, percent);
for_each_evictable_lru(l) {
int file = is_file_lru(l);
unsigned long scan;
- if (percent[file] == 0) {
- nr[l] = 0;
- continue;
- }
-
scan = zone_nr_lru_pages(zone, sc, l);
- if (priority) {
+ if (priority || noswap) {
scan >>= priority;
scan = (scan * percent[file]) / 100;
}