diff options
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r-- | mm/swapfile.c | 259 |
1 files changed, 190 insertions, 69 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c index 4f6cba1b6632..984f0dd94948 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -199,7 +199,11 @@ static void discard_swap_cluster(struct swap_info_struct *si, } } +#ifdef CONFIG_THP_SWAP +#define SWAPFILE_CLUSTER HPAGE_PMD_NR +#else #define SWAPFILE_CLUSTER 256 +#endif #define LATENCY_LIMIT 256 static inline void cluster_set_flag(struct swap_cluster_info *info, @@ -374,6 +378,14 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si, schedule_work(&si->discard_work); } +static void __free_cluster(struct swap_info_struct *si, unsigned long idx) +{ + struct swap_cluster_info *ci = si->cluster_info; + + cluster_set_flag(ci + idx, CLUSTER_FLAG_FREE); + cluster_list_add_tail(&si->free_clusters, ci, idx); +} + /* * Doing discard actually. After a cluster discard is finished, the cluster * will be added to free cluster list. caller should hold si->lock. @@ -394,10 +406,7 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si) spin_lock(&si->lock); ci = lock_cluster(si, idx * SWAPFILE_CLUSTER); - cluster_set_flag(ci, CLUSTER_FLAG_FREE); - unlock_cluster(ci); - cluster_list_add_tail(&si->free_clusters, info, idx); - ci = lock_cluster(si, idx * SWAPFILE_CLUSTER); + __free_cluster(si, idx); memset(si->swap_map + idx * SWAPFILE_CLUSTER, 0, SWAPFILE_CLUSTER); unlock_cluster(ci); @@ -415,6 +424,34 @@ static void swap_discard_work(struct work_struct *work) spin_unlock(&si->lock); } +static void alloc_cluster(struct swap_info_struct *si, unsigned long idx) +{ + struct swap_cluster_info *ci = si->cluster_info; + + VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx); + cluster_list_del_first(&si->free_clusters, ci); + cluster_set_count_flag(ci + idx, 0, 0); +} + +static void free_cluster(struct swap_info_struct *si, unsigned long idx) +{ + struct swap_cluster_info *ci = si->cluster_info + idx; + + VM_BUG_ON(cluster_count(ci) != 0); + /* + * If the swap is discardable, prepare discard the cluster + * instead of free it immediately. The cluster will be freed + * after discard. + */ + if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == + (SWP_WRITEOK | SWP_PAGE_DISCARD)) { + swap_cluster_schedule_discard(si, idx); + return; + } + + __free_cluster(si, idx); +} + /* * The cluster corresponding to page_nr will be used. The cluster will be * removed from free cluster list and its usage counter will be increased. @@ -426,11 +463,8 @@ static void inc_cluster_info_page(struct swap_info_struct *p, if (!cluster_info) return; - if (cluster_is_free(&cluster_info[idx])) { - VM_BUG_ON(cluster_list_first(&p->free_clusters) != idx); - cluster_list_del_first(&p->free_clusters, cluster_info); - cluster_set_count_flag(&cluster_info[idx], 0, 0); - } + if (cluster_is_free(&cluster_info[idx])) + alloc_cluster(p, idx); VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER); cluster_set_count(&cluster_info[idx], @@ -454,21 +488,8 @@ static void dec_cluster_info_page(struct swap_info_struct *p, cluster_set_count(&cluster_info[idx], cluster_count(&cluster_info[idx]) - 1); - if (cluster_count(&cluster_info[idx]) == 0) { - /* - * If the swap is discardable, prepare discard the cluster - * instead of free it immediately. The cluster will be freed - * after discard. - */ - if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == - (SWP_WRITEOK | SWP_PAGE_DISCARD)) { - swap_cluster_schedule_discard(p, idx); - return; - } - - cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE); - cluster_list_add_tail(&p->free_clusters, cluster_info, idx); - } + if (cluster_count(&cluster_info[idx]) == 0) + free_cluster(p, idx); } /* @@ -558,6 +579,60 @@ new_cluster: return found_free; } +static void swap_range_alloc(struct swap_info_struct *si, unsigned long offset, + unsigned int nr_entries) +{ + unsigned int end = offset + nr_entries - 1; + + if (offset == si->lowest_bit) + si->lowest_bit += nr_entries; + if (end == si->highest_bit) + si->highest_bit -= nr_entries; + si->inuse_pages += nr_entries; + if (si->inuse_pages == si->pages) { + si->lowest_bit = si->max; + si->highest_bit = 0; + spin_lock(&swap_avail_lock); + plist_del(&si->avail_list, &swap_avail_head); + spin_unlock(&swap_avail_lock); + } +} + +static void swap_range_free(struct swap_info_struct *si, unsigned long offset, + unsigned int nr_entries) +{ + unsigned long end = offset + nr_entries - 1; + void (*swap_slot_free_notify)(struct block_device *, unsigned long); + + if (offset < si->lowest_bit) + si->lowest_bit = offset; + if (end > si->highest_bit) { + bool was_full = !si->highest_bit; + + si->highest_bit = end; + if (was_full && (si->flags & SWP_WRITEOK)) { + spin_lock(&swap_avail_lock); + WARN_ON(!plist_node_empty(&si->avail_list)); + if (plist_node_empty(&si->avail_list)) + plist_add(&si->avail_list, &swap_avail_head); + spin_unlock(&swap_avail_lock); + } + } + atomic_long_add(nr_entries, &nr_swap_pages); + si->inuse_pages -= nr_entries; + if (si->flags & SWP_BLKDEV) + swap_slot_free_notify = + si->bdev->bd_disk->fops->swap_slot_free_notify; + else + swap_slot_free_notify = NULL; + while (offset <= end) { + frontswap_invalidate_page(si->type, offset); + if (swap_slot_free_notify) + swap_slot_free_notify(si->bdev, offset); + offset++; + } +} + static int scan_swap_map_slots(struct swap_info_struct *si, unsigned char usage, int nr, swp_entry_t slots[]) @@ -676,18 +751,7 @@ checks: inc_cluster_info_page(si, si->cluster_info, offset); unlock_cluster(ci); - if (offset == si->lowest_bit) - si->lowest_bit++; - if (offset == si->highest_bit) - si->highest_bit--; - si->inuse_pages++; - if (si->inuse_pages == si->pages) { - si->lowest_bit = si->max; - si->highest_bit = 0; - spin_lock(&swap_avail_lock); - plist_del(&si->avail_list, &swap_avail_head); - spin_unlock(&swap_avail_lock); - } + swap_range_alloc(si, offset, 1); si->cluster_next = offset + 1; slots[n_ret++] = swp_entry(si->type, offset); @@ -766,6 +830,52 @@ no_page: return n_ret; } +#ifdef CONFIG_THP_SWAP +static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot) +{ + unsigned long idx; + struct swap_cluster_info *ci; + unsigned long offset, i; + unsigned char *map; + + if (cluster_list_empty(&si->free_clusters)) + return 0; + + idx = cluster_list_first(&si->free_clusters); + offset = idx * SWAPFILE_CLUSTER; + ci = lock_cluster(si, offset); + alloc_cluster(si, idx); + cluster_set_count_flag(ci, SWAPFILE_CLUSTER, 0); + + map = si->swap_map + offset; + for (i = 0; i < SWAPFILE_CLUSTER; i++) + map[i] = SWAP_HAS_CACHE; + unlock_cluster(ci); + swap_range_alloc(si, offset, SWAPFILE_CLUSTER); + *slot = swp_entry(si->type, offset); + + return 1; +} + +static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx) +{ + unsigned long offset = idx * SWAPFILE_CLUSTER; + struct swap_cluster_info *ci; + + ci = lock_cluster(si, offset); + cluster_set_count_flag(ci, 0, 0); + free_cluster(si, idx); + unlock_cluster(ci); + swap_range_free(si, offset, SWAPFILE_CLUSTER); +} +#else +static int swap_alloc_cluster(struct swap_info_struct *si, swp_entry_t *slot) +{ + VM_WARN_ON_ONCE(1); + return 0; +} +#endif /* CONFIG_THP_SWAP */ + static unsigned long scan_swap_map(struct swap_info_struct *si, unsigned char usage) { @@ -781,13 +891,17 @@ static unsigned long scan_swap_map(struct swap_info_struct *si, } -int get_swap_pages(int n_goal, swp_entry_t swp_entries[]) +int get_swap_pages(int n_goal, bool cluster, swp_entry_t swp_entries[]) { + unsigned long nr_pages = cluster ? SWAPFILE_CLUSTER : 1; struct swap_info_struct *si, *next; long avail_pgs; int n_ret = 0; - avail_pgs = atomic_long_read(&nr_swap_pages); + /* Only single cluster request supported */ + WARN_ON_ONCE(n_goal > 1 && cluster); + + avail_pgs = atomic_long_read(&nr_swap_pages) / nr_pages; if (avail_pgs <= 0) goto noswap; @@ -797,7 +911,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[]) if (n_goal > avail_pgs) n_goal = avail_pgs; - atomic_long_sub(n_goal, &nr_swap_pages); + atomic_long_sub(n_goal * nr_pages, &nr_swap_pages); spin_lock(&swap_avail_lock); @@ -823,10 +937,13 @@ start_over: spin_unlock(&si->lock); goto nextsi; } - n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, - n_goal, swp_entries); + if (cluster) + n_ret = swap_alloc_cluster(si, swp_entries); + else + n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE, + n_goal, swp_entries); spin_unlock(&si->lock); - if (n_ret) + if (n_ret || cluster) goto check_out; pr_debug("scan_swap_map of si %d failed to find offset\n", si->type); @@ -852,7 +969,8 @@ nextsi: check_out: if (n_ret < n_goal) - atomic_long_add((long) (n_goal-n_ret), &nr_swap_pages); + atomic_long_add((long)(n_goal - n_ret) * nr_pages, + &nr_swap_pages); noswap: return n_ret; } @@ -1008,32 +1126,8 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry) dec_cluster_info_page(p, p->cluster_info, offset); unlock_cluster(ci); - mem_cgroup_uncharge_swap(entry); - if (offset < p->lowest_bit) - p->lowest_bit = offset; - if (offset > p->highest_bit) { - bool was_full = !p->highest_bit; - - p->highest_bit = offset; - if (was_full && (p->flags & SWP_WRITEOK)) { - spin_lock(&swap_avail_lock); - WARN_ON(!plist_node_empty(&p->avail_list)); - if (plist_node_empty(&p->avail_list)) - plist_add(&p->avail_list, - &swap_avail_head); - spin_unlock(&swap_avail_lock); - } - } - atomic_long_inc(&nr_swap_pages); - p->inuse_pages--; - frontswap_invalidate_page(p->type, offset); - if (p->flags & SWP_BLKDEV) { - struct gendisk *disk = p->bdev->bd_disk; - - if (disk->fops->swap_slot_free_notify) - disk->fops->swap_slot_free_notify(p->bdev, - offset); - } + mem_cgroup_uncharge_swap(entry, 1); + swap_range_free(p, offset, 1); } /* @@ -1065,6 +1159,33 @@ void swapcache_free(swp_entry_t entry) } } +#ifdef CONFIG_THP_SWAP +void swapcache_free_cluster(swp_entry_t entry) +{ + unsigned long offset = swp_offset(entry); + unsigned long idx = offset / SWAPFILE_CLUSTER; + struct swap_cluster_info *ci; + struct swap_info_struct *si; + unsigned char *map; + unsigned int i; + + si = swap_info_get(entry); + if (!si) + return; + + ci = lock_cluster(si, offset); + map = si->swap_map + offset; + for (i = 0; i < SWAPFILE_CLUSTER; i++) { + VM_BUG_ON(map[i] != SWAP_HAS_CACHE); + map[i] = 0; + } + unlock_cluster(ci); + mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER); + swap_free_cluster(si, idx); + spin_unlock(&si->lock); +} +#endif /* CONFIG_THP_SWAP */ + void swapcache_free_entries(swp_entry_t *entries, int n) { struct swap_info_struct *p, *prev; |