summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/bootmem.c24
-rw-r--r--mm/compaction.c98
-rw-r--r--mm/huge_memory.c19
-rw-r--r--mm/hugetlb.c1
-rw-r--r--mm/internal.h1
-rw-r--r--mm/memblock.c3
-rw-r--r--mm/memcontrol.c4
-rw-r--r--mm/migrate.c18
-rw-r--r--mm/mlock.c6
-rw-r--r--mm/mmap.c4
-rw-r--r--mm/page_alloc.c56
11 files changed, 99 insertions, 135 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 1324cd74faec..b93376c39b61 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -185,10 +185,23 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
while (start < end) {
unsigned long *map, idx, vec;
+ unsigned shift;
map = bdata->node_bootmem_map;
idx = start - bdata->node_min_pfn;
+ shift = idx & (BITS_PER_LONG - 1);
+ /*
+ * vec holds at most BITS_PER_LONG map bits,
+ * bit 0 corresponds to start.
+ */
vec = ~map[idx / BITS_PER_LONG];
+
+ if (shift) {
+ vec >>= shift;
+ if (end - start >= BITS_PER_LONG)
+ vec |= ~map[idx / BITS_PER_LONG + 1] <<
+ (BITS_PER_LONG - shift);
+ }
/*
* If we have a properly aligned and fully unreserved
* BITS_PER_LONG block of pages in front of us, free
@@ -201,19 +214,18 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
count += BITS_PER_LONG;
start += BITS_PER_LONG;
} else {
- unsigned long off = 0;
+ unsigned long cur = start;
- vec >>= start & (BITS_PER_LONG - 1);
- while (vec) {
+ start = ALIGN(start + 1, BITS_PER_LONG);
+ while (vec && cur != start) {
if (vec & 1) {
- page = pfn_to_page(start + off);
+ page = pfn_to_page(cur);
__free_pages_bootmem(page, 0);
count++;
}
vec >>= 1;
- off++;
+ ++cur;
}
- start = ALIGN(start + 1, BITS_PER_LONG);
}
}
diff --git a/mm/compaction.c b/mm/compaction.c
index 6b807e466497..c62bd063d766 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -816,6 +816,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone,
static int compact_finished(struct zone *zone,
struct compact_control *cc)
{
+ unsigned int order;
unsigned long watermark;
if (fatal_signal_pending(current))
@@ -850,22 +851,16 @@ static int compact_finished(struct zone *zone,
return COMPACT_CONTINUE;
/* Direct compactor: Is a suitable page free? */
- if (cc->page) {
- /* Was a suitable page captured? */
- if (*cc->page)
+ for (order = cc->order; order < MAX_ORDER; order++) {
+ struct free_area *area = &zone->free_area[order];
+
+ /* Job done if page is free of the right migratetype */
+ if (!list_empty(&area->free_list[cc->migratetype]))
+ return COMPACT_PARTIAL;
+
+ /* Job done if allocation would set block type */
+ if (cc->order >= pageblock_order && area->nr_free)
return COMPACT_PARTIAL;
- } else {
- unsigned int order;
- for (order = cc->order; order < MAX_ORDER; order++) {
- struct free_area *area = &zone->free_area[cc->order];
- /* Job done if page is free of the right migratetype */
- if (!list_empty(&area->free_list[cc->migratetype]))
- return COMPACT_PARTIAL;
-
- /* Job done if allocation would set block type */
- if (cc->order >= pageblock_order && area->nr_free)
- return COMPACT_PARTIAL;
- }
}
return COMPACT_CONTINUE;
@@ -921,60 +916,6 @@ unsigned long compaction_suitable(struct zone *zone, int order)
return COMPACT_CONTINUE;
}
-static void compact_capture_page(struct compact_control *cc)
-{
- unsigned long flags;
- int mtype, mtype_low, mtype_high;
-
- if (!cc->page || *cc->page)
- return;
-
- /*
- * For MIGRATE_MOVABLE allocations we capture a suitable page ASAP
- * regardless of the migratetype of the freelist is is captured from.
- * This is fine because the order for a high-order MIGRATE_MOVABLE
- * allocation is typically at least a pageblock size and overall
- * fragmentation is not impaired. Other allocation types must
- * capture pages from their own migratelist because otherwise they
- * could pollute other pageblocks like MIGRATE_MOVABLE with
- * difficult to move pages and making fragmentation worse overall.
- */
- if (cc->migratetype == MIGRATE_MOVABLE) {
- mtype_low = 0;
- mtype_high = MIGRATE_PCPTYPES;
- } else {
- mtype_low = cc->migratetype;
- mtype_high = cc->migratetype + 1;
- }
-
- /* Speculatively examine the free lists without zone lock */
- for (mtype = mtype_low; mtype < mtype_high; mtype++) {
- int order;
- for (order = cc->order; order < MAX_ORDER; order++) {
- struct page *page;
- struct free_area *area;
- area = &(cc->zone->free_area[order]);
- if (list_empty(&area->free_list[mtype]))
- continue;
-
- /* Take the lock and attempt capture of the page */
- if (!compact_trylock_irqsave(&cc->zone->lock, &flags, cc))
- return;
- if (!list_empty(&area->free_list[mtype])) {
- page = list_entry(area->free_list[mtype].next,
- struct page, lru);
- if (capture_free_page(page, cc->order, mtype)) {
- spin_unlock_irqrestore(&cc->zone->lock,
- flags);
- *cc->page = page;
- return;
- }
- }
- spin_unlock_irqrestore(&cc->zone->lock, flags);
- }
- }
-}
-
static int compact_zone(struct zone *zone, struct compact_control *cc)
{
int ret;
@@ -1054,9 +995,6 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
goto out;
}
}
-
- /* Capture a page now if it is a suitable size */
- compact_capture_page(cc);
}
out:
@@ -1069,8 +1007,7 @@ out:
static unsigned long compact_zone_order(struct zone *zone,
int order, gfp_t gfp_mask,
- bool sync, bool *contended,
- struct page **page)
+ bool sync, bool *contended)
{
unsigned long ret;
struct compact_control cc = {
@@ -1080,7 +1017,6 @@ static unsigned long compact_zone_order(struct zone *zone,
.migratetype = allocflags_to_migratetype(gfp_mask),
.zone = zone,
.sync = sync,
- .page = page,
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
@@ -1110,7 +1046,7 @@ int sysctl_extfrag_threshold = 500;
*/
unsigned long try_to_compact_pages(struct zonelist *zonelist,
int order, gfp_t gfp_mask, nodemask_t *nodemask,
- bool sync, bool *contended, struct page **page)
+ bool sync, bool *contended)
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
int may_enter_fs = gfp_mask & __GFP_FS;
@@ -1136,7 +1072,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
int status;
status = compact_zone_order(zone, order, gfp_mask, sync,
- contended, page);
+ contended);
rc = max(status, rc);
/* If a normal allocation would succeed, stop compacting */
@@ -1192,7 +1128,6 @@ int compact_pgdat(pg_data_t *pgdat, int order)
struct compact_control cc = {
.order = order,
.sync = false,
- .page = NULL,
};
return __compact_pgdat(pgdat, &cc);
@@ -1203,14 +1138,13 @@ static int compact_node(int nid)
struct compact_control cc = {
.order = -1,
.sync = true,
- .page = NULL,
};
return __compact_pgdat(NODE_DATA(nid), &cc);
}
/* Compact all nodes in the system */
-static int compact_nodes(void)
+static void compact_nodes(void)
{
int nid;
@@ -1219,8 +1153,6 @@ static int compact_nodes(void)
for_each_online_node(nid)
compact_node(nid);
-
- return COMPACT_COMPLETE;
}
/* The written value is actually unused, all memory is compacted */
@@ -1231,7 +1163,7 @@ int sysctl_compaction_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
if (write)
- return compact_nodes();
+ compact_nodes();
return 0;
}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9e894edc7811..b5783d81eda9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1257,6 +1257,10 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if (flags & FOLL_WRITE && !pmd_write(*pmd))
goto out;
+ /* Avoid dumping huge zero page */
+ if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
+ return ERR_PTR(-EFAULT);
+
page = pmd_page(*pmd);
VM_BUG_ON(!PageHead(page));
if (flags & FOLL_TOUCH) {
@@ -1819,9 +1823,19 @@ int split_huge_page(struct page *page)
BUG_ON(is_huge_zero_pfn(page_to_pfn(page)));
BUG_ON(!PageAnon(page));
- anon_vma = page_lock_anon_vma_read(page);
+
+ /*
+ * The caller does not necessarily hold an mmap_sem that would prevent
+ * the anon_vma disappearing so we first we take a reference to it
+ * and then lock the anon_vma for write. This is similar to
+ * page_lock_anon_vma_read except the write lock is taken to serialise
+ * against parallel split or collapse operations.
+ */
+ anon_vma = page_get_anon_vma(page);
if (!anon_vma)
goto out;
+ anon_vma_lock_write(anon_vma);
+
ret = 0;
if (!PageCompound(page))
goto out_unlock;
@@ -1832,7 +1846,8 @@ int split_huge_page(struct page *page)
BUG_ON(PageCompound(page));
out_unlock:
- page_unlock_anon_vma_read(anon_vma);
+ anon_vma_unlock(anon_vma);
+ put_anon_vma(anon_vma);
out:
return ret;
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4f3ea0b1e57c..546db81820e4 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3033,6 +3033,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
if (!huge_pte_none(huge_ptep_get(ptep))) {
pte = huge_ptep_get_and_clear(mm, address, ptep);
pte = pte_mkhuge(pte_modify(pte, newprot));
+ pte = arch_make_huge_pte(pte, vma, NULL, 0);
set_huge_pte_at(mm, address, ptep, pte);
pages++;
}
diff --git a/mm/internal.h b/mm/internal.h
index d597f94cc205..9ba21100ebf3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -135,7 +135,6 @@ struct compact_control {
int migratetype; /* MOVABLE, RECLAIMABLE etc */
struct zone *zone;
bool contended; /* True if a lock was contended */
- struct page **page; /* Page captured of requested size */
};
unsigned long
diff --git a/mm/memblock.c b/mm/memblock.c
index 625905523c2a..88adc8afb610 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -314,7 +314,8 @@ static void __init_memblock memblock_merge_regions(struct memblock_type *type)
}
this->size += next->size;
- memmove(next, next + 1, (type->cnt - (i + 1)) * sizeof(*next));
+ /* move forward from next + 1, index of which is i + 2 */
+ memmove(next, next + 1, (type->cnt - (i + 2)) * sizeof(*next));
type->cnt--;
}
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 09255ec8159c..fbb60b103e64 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3030,7 +3030,9 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
if (memcg) {
s->memcg_params->memcg = memcg;
s->memcg_params->root_cache = root_cache;
- }
+ } else
+ s->memcg_params->is_root_cache = true;
+
return 0;
}
diff --git a/mm/migrate.c b/mm/migrate.c
index 3b676b0c5c3e..2fd8b4af4744 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -160,8 +160,10 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
if (is_write_migration_entry(entry))
pte = pte_mkwrite(pte);
#ifdef CONFIG_HUGETLB_PAGE
- if (PageHuge(new))
+ if (PageHuge(new)) {
pte = pte_mkhuge(pte);
+ pte = arch_make_huge_pte(pte, vma, new, 0);
+ }
#endif
flush_cache_page(vma, addr, pte_pfn(pte));
set_pte_at(mm, addr, ptep, pte);
@@ -1679,9 +1681,21 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
page_xchg_last_nid(new_page, page_last_nid(page));
isolated = numamigrate_isolate_page(pgdat, page);
- if (!isolated) {
+
+ /*
+ * Failing to isolate or a GUP pin prevents migration. The expected
+ * page count is 2. 1 for anonymous pages without a mapping and 1
+ * for the callers pin. If the page was isolated, the page will
+ * need to be put back on the LRU.
+ */
+ if (!isolated || page_count(page) != 2) {
count_vm_events(PGMIGRATE_FAIL, HPAGE_PMD_NR);
put_page(new_page);
+ if (isolated) {
+ putback_lru_page(page);
+ isolated = 0;
+ goto out;
+ }
goto out_keep_locked;
}
diff --git a/mm/mlock.c b/mm/mlock.c
index f0b9ce572fc7..c9bd528b01d2 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -517,11 +517,11 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len)
static int do_mlockall(int flags)
{
struct vm_area_struct * vma, * prev = NULL;
- unsigned int def_flags = 0;
if (flags & MCL_FUTURE)
- def_flags = VM_LOCKED;
- current->mm->def_flags = def_flags;
+ current->mm->def_flags |= VM_LOCKED;
+ else
+ current->mm->def_flags &= ~VM_LOCKED;
if (flags == MCL_FUTURE)
goto out;
diff --git a/mm/mmap.c b/mm/mmap.c
index f54b235f29a9..d1e4124f3d0e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2886,7 +2886,7 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
* The LSB of head.next can't change from under us
* because we hold the mm_all_locks_mutex.
*/
- down_write(&anon_vma->root->rwsem);
+ down_write_nest_lock(&anon_vma->root->rwsem, &mm->mmap_sem);
/*
* We can safely modify head.next after taking the
* anon_vma->root->rwsem. If some other vma in this mm shares
@@ -2943,7 +2943,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
* vma in this mm is backed by the same anon_vma or address_space.
*
* We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never
* takes more than one of them in a row. Secondly we're protected
* against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bc6cc0e913bd..6a83cd35cfde 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -773,6 +773,10 @@ void __init init_cma_reserved_pageblock(struct page *page)
set_pageblock_migratetype(page, MIGRATE_CMA);
__free_pages(page, pageblock_order);
totalram_pages += pageblock_nr_pages;
+#ifdef CONFIG_HIGHMEM
+ if (PageHighMem(page))
+ totalhigh_pages += pageblock_nr_pages;
+#endif
}
#endif
@@ -1384,14 +1388,8 @@ void split_page(struct page *page, unsigned int order)
set_page_refcounted(page + i);
}
-/*
- * Similar to the split_page family of functions except that the page
- * required at the given order and being isolated now to prevent races
- * with parallel allocators
- */
-int capture_free_page(struct page *page, int alloc_order, int migratetype)
+static int __isolate_free_page(struct page *page, unsigned int order)
{
- unsigned int order;
unsigned long watermark;
struct zone *zone;
int mt;
@@ -1399,7 +1397,6 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
BUG_ON(!PageBuddy(page));
zone = page_zone(page);
- order = page_order(page);
mt = get_pageblock_migratetype(page);
if (mt != MIGRATE_ISOLATE) {
@@ -1408,7 +1405,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
return 0;
- __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt);
+ __mod_zone_freepage_state(zone, -(1UL << order), mt);
}
/* Remove page from free list */
@@ -1416,11 +1413,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
zone->free_area[order].nr_free--;
rmv_page_order(page);
- if (alloc_order != order)
- expand(zone, page, alloc_order, order,
- &zone->free_area[order], migratetype);
-
- /* Set the pageblock if the captured page is at least a pageblock */
+ /* Set the pageblock if the isolated page is at least a pageblock */
if (order >= pageblock_order - 1) {
struct page *endpage = page + (1 << order) - 1;
for (; page < endpage; page += pageblock_nr_pages) {
@@ -1431,7 +1424,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype)
}
}
- return 1UL << alloc_order;
+ return 1UL << order;
}
/*
@@ -1449,10 +1442,9 @@ int split_free_page(struct page *page)
unsigned int order;
int nr_pages;
- BUG_ON(!PageBuddy(page));
order = page_order(page);
- nr_pages = capture_free_page(page, order, 0);
+ nr_pages = __isolate_free_page(page, order);
if (!nr_pages)
return 0;
@@ -2136,8 +2128,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
bool *contended_compaction, bool *deferred_compaction,
unsigned long *did_some_progress)
{
- struct page *page = NULL;
-
if (!order)
return NULL;
@@ -2149,16 +2139,12 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
current->flags |= PF_MEMALLOC;
*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
nodemask, sync_migration,
- contended_compaction, &page);
+ contended_compaction);
current->flags &= ~PF_MEMALLOC;
- /* If compaction captured a page, prep and use it */
- if (page) {
- prep_new_page(page, order, gfp_mask);
- goto got_page;
- }
-
if (*did_some_progress != COMPACT_SKIPPED) {
+ struct page *page;
+
/* Page migration frees to the PCP lists but we want merging */
drain_pages(get_cpu());
put_cpu();
@@ -2168,7 +2154,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
alloc_flags & ~ALLOC_NO_WATERMARKS,
preferred_zone, migratetype);
if (page) {
-got_page:
preferred_zone->compact_blockskip_flush = false;
preferred_zone->compact_considered = 0;
preferred_zone->compact_defer_shift = 0;
@@ -4435,10 +4420,11 @@ static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
* round what is now in bits to nearest long in bits, then return it in
* bytes.
*/
-static unsigned long __init usemap_size(unsigned long zonesize)
+static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned long zonesize)
{
unsigned long usemapsize;
+ zonesize += zone_start_pfn & (pageblock_nr_pages-1);
usemapsize = roundup(zonesize, pageblock_nr_pages);
usemapsize = usemapsize >> pageblock_order;
usemapsize *= NR_PAGEBLOCK_BITS;
@@ -4448,17 +4434,19 @@ static unsigned long __init usemap_size(unsigned long zonesize)
}
static void __init setup_usemap(struct pglist_data *pgdat,
- struct zone *zone, unsigned long zonesize)
+ struct zone *zone,
+ unsigned long zone_start_pfn,
+ unsigned long zonesize)
{
- unsigned long usemapsize = usemap_size(zonesize);
+ unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
zone->pageblock_flags = NULL;
if (usemapsize)
zone->pageblock_flags = alloc_bootmem_node_nopanic(pgdat,
usemapsize);
}
#else
-static inline void setup_usemap(struct pglist_data *pgdat,
- struct zone *zone, unsigned long zonesize) {}
+static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
+ unsigned long zone_start_pfn, unsigned long zonesize) {}
#endif /* CONFIG_SPARSEMEM */
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -4609,7 +4597,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
continue;
set_pageblock_order();
- setup_usemap(pgdat, zone, size);
+ setup_usemap(pgdat, zone, zone_start_pfn, size);
ret = init_currently_empty_zone(zone, zone_start_pfn,
size, MEMMAP_EARLY);
BUG_ON(ret);
@@ -5604,7 +5592,7 @@ static inline int pfn_to_bitidx(struct zone *zone, unsigned long pfn)
pfn &= (PAGES_PER_SECTION-1);
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#else
- pfn = pfn - zone->zone_start_pfn;
+ pfn = pfn - round_down(zone->zone_start_pfn, pageblock_nr_pages);
return (pfn >> pageblock_order) * NR_PAGEBLOCK_BITS;
#endif /* CONFIG_SPARSEMEM */
}