summaryrefslogtreecommitdiffstats
path: root/drivers/hv/hv_balloon.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/hv/hv_balloon.c')
-rw-r--r--drivers/hv/hv_balloon.c143
1 files changed, 73 insertions, 70 deletions
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index ff169386b2c7..cb5b7dc9797f 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -428,14 +428,13 @@ struct dm_info_msg {
* currently hot added. We hot add in multiples of 128M
* chunks; it is possible that we may not be able to bring
* online all the pages in the region. The range
- * covered_start_pfn : covered_end_pfn defines the pages that can
+ * covered_end_pfn defines the pages that can
* be brough online.
*/
struct hv_hotadd_state {
struct list_head list;
unsigned long start_pfn;
- unsigned long covered_start_pfn;
unsigned long covered_end_pfn;
unsigned long ha_end_pfn;
unsigned long end_pfn;
@@ -503,6 +502,8 @@ struct hv_dynmem_device {
* Number of pages we have currently ballooned out.
*/
unsigned int num_pages_ballooned;
+ unsigned int num_pages_onlined;
+ unsigned int num_pages_added;
/*
* State to manage the ballooning (up) operation.
@@ -534,7 +535,6 @@ struct hv_dynmem_device {
struct task_struct *thread;
struct mutex ha_region_mutex;
- struct completion waiter_event;
/*
* A list of hot-add regions.
@@ -554,46 +554,32 @@ static struct hv_dynmem_device dm_device;
static void post_status(struct hv_dynmem_device *dm);
#ifdef CONFIG_MEMORY_HOTPLUG
-static void acquire_region_mutex(bool trylock)
-{
- if (trylock) {
- reinit_completion(&dm_device.waiter_event);
- while (!mutex_trylock(&dm_device.ha_region_mutex))
- wait_for_completion(&dm_device.waiter_event);
- } else {
- mutex_lock(&dm_device.ha_region_mutex);
- }
-}
-
-static void release_region_mutex(bool trylock)
-{
- if (trylock) {
- mutex_unlock(&dm_device.ha_region_mutex);
- } else {
- mutex_unlock(&dm_device.ha_region_mutex);
- complete(&dm_device.waiter_event);
- }
-}
-
static int hv_memory_notifier(struct notifier_block *nb, unsigned long val,
void *v)
{
+ struct memory_notify *mem = (struct memory_notify *)v;
+
switch (val) {
case MEM_GOING_ONLINE:
- acquire_region_mutex(true);
+ mutex_lock(&dm_device.ha_region_mutex);
break;
case MEM_ONLINE:
+ dm_device.num_pages_onlined += mem->nr_pages;
case MEM_CANCEL_ONLINE:
- release_region_mutex(true);
+ mutex_unlock(&dm_device.ha_region_mutex);
if (dm_device.ha_waiting) {
dm_device.ha_waiting = false;
complete(&dm_device.ol_waitevent);
}
break;
- case MEM_GOING_OFFLINE:
case MEM_OFFLINE:
+ mutex_lock(&dm_device.ha_region_mutex);
+ dm_device.num_pages_onlined -= mem->nr_pages;
+ mutex_unlock(&dm_device.ha_region_mutex);
+ break;
+ case MEM_GOING_OFFLINE:
case MEM_CANCEL_OFFLINE:
break;
}
@@ -646,7 +632,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
init_completion(&dm_device.ol_waitevent);
dm_device.ha_waiting = true;
- release_region_mutex(false);
+ mutex_unlock(&dm_device.ha_region_mutex);
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
ret = add_memory(nid, PFN_PHYS((start_pfn)),
(HA_CHUNK << PAGE_SHIFT));
@@ -665,6 +651,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
}
has->ha_end_pfn -= HA_CHUNK;
has->covered_end_pfn -= processed_pfn;
+ mutex_lock(&dm_device.ha_region_mutex);
break;
}
@@ -675,7 +662,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
* have not been "onlined" within the allowed time.
*/
wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ);
- acquire_region_mutex(false);
+ mutex_lock(&dm_device.ha_region_mutex);
post_status(&dm_device);
}
@@ -691,8 +678,7 @@ static void hv_online_page(struct page *pg)
list_for_each(cur, &dm_device.ha_region_list) {
has = list_entry(cur, struct hv_hotadd_state, list);
- cur_start_pgp = (unsigned long)
- pfn_to_page(has->covered_start_pfn);
+ cur_start_pgp = (unsigned long)pfn_to_page(has->start_pfn);
cur_end_pgp = (unsigned long)pfn_to_page(has->covered_end_pfn);
if (((unsigned long)pg >= cur_start_pgp) &&
@@ -704,7 +690,6 @@ static void hv_online_page(struct page *pg)
__online_page_set_limits(pg);
__online_page_increment_counters(pg);
__online_page_free(pg);
- has->covered_start_pfn++;
}
}
}
@@ -748,10 +733,9 @@ static bool pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt)
* is, update it.
*/
- if (has->covered_end_pfn != start_pfn) {
+ if (has->covered_end_pfn != start_pfn)
has->covered_end_pfn = start_pfn;
- has->covered_start_pfn = start_pfn;
- }
+
return true;
}
@@ -794,9 +778,18 @@ static unsigned long handle_pg_range(unsigned long pg_start,
pgs_ol = has->ha_end_pfn - start_pfn;
if (pgs_ol > pfn_cnt)
pgs_ol = pfn_cnt;
- hv_bring_pgs_online(start_pfn, pgs_ol);
+
+ /*
+ * Check if the corresponding memory block is already
+ * online by checking its last previously backed page.
+ * In case it is we need to bring rest (which was not
+ * backed previously) online too.
+ */
+ if (start_pfn > has->start_pfn &&
+ !PageReserved(pfn_to_page(start_pfn - 1)))
+ hv_bring_pgs_online(start_pfn, pgs_ol);
+
has->covered_end_pfn += pgs_ol;
- has->covered_start_pfn += pgs_ol;
pfn_cnt -= pgs_ol;
}
@@ -857,7 +850,6 @@ static unsigned long process_hot_add(unsigned long pg_start,
list_add_tail(&ha_region->list, &dm_device.ha_region_list);
ha_region->start_pfn = rg_start;
ha_region->ha_end_pfn = rg_start;
- ha_region->covered_start_pfn = pg_start;
ha_region->covered_end_pfn = pg_start;
ha_region->end_pfn = rg_start + rg_size;
}
@@ -886,7 +878,7 @@ static void hot_add_req(struct work_struct *dummy)
resp.hdr.size = sizeof(struct dm_hot_add_response);
#ifdef CONFIG_MEMORY_HOTPLUG
- acquire_region_mutex(false);
+ mutex_lock(&dm_device.ha_region_mutex);
pg_start = dm->ha_wrk.ha_page_range.finfo.start_page;
pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt;
@@ -918,7 +910,9 @@ static void hot_add_req(struct work_struct *dummy)
if (do_hot_add)
resp.page_count = process_hot_add(pg_start, pfn_cnt,
rg_start, rg_sz);
- release_region_mutex(false);
+
+ dm->num_pages_added += resp.page_count;
+ mutex_unlock(&dm_device.ha_region_mutex);
#endif
/*
* The result field of the response structure has the
@@ -982,8 +976,8 @@ static unsigned long compute_balloon_floor(void)
* 128 72 (1/2)
* 512 168 (1/4)
* 2048 360 (1/8)
- * 8192 768 (1/16)
- * 32768 1536 (1/32)
+ * 8192 744 (1/16)
+ * 32768 1512 (1/32)
*/
if (totalram_pages < MB2PAGES(128))
min_pages = MB2PAGES(8) + (totalram_pages >> 1);
@@ -992,9 +986,9 @@ static unsigned long compute_balloon_floor(void)
else if (totalram_pages < MB2PAGES(2048))
min_pages = MB2PAGES(104) + (totalram_pages >> 3);
else if (totalram_pages < MB2PAGES(8192))
- min_pages = MB2PAGES(256) + (totalram_pages >> 4);
+ min_pages = MB2PAGES(232) + (totalram_pages >> 4);
else
- min_pages = MB2PAGES(512) + (totalram_pages >> 5);
+ min_pages = MB2PAGES(488) + (totalram_pages >> 5);
#undef MB2PAGES
return min_pages;
}
@@ -1031,17 +1025,21 @@ static void post_status(struct hv_dynmem_device *dm)
status.hdr.trans_id = atomic_inc_return(&trans_id);
/*
- * The host expects the guest to report free memory.
- * Further, the host expects the pressure information to
- * include the ballooned out pages.
- * For a given amount of memory that we are managing, we
- * need to compute a floor below which we should not balloon.
- * Compute this and add it to the pressure report.
+ * The host expects the guest to report free and committed memory.
+ * Furthermore, the host expects the pressure information to include
+ * the ballooned out pages. For a given amount of memory that we are
+ * managing we need to compute a floor below which we should not
+ * balloon. Compute this and add it to the pressure report.
+ * We also need to report all offline pages (num_pages_added -
+ * num_pages_onlined) as committed to the host, otherwise it can try
+ * asking us to balloon them out.
*/
status.num_avail = val.freeram;
status.num_committed = vm_memory_committed() +
- dm->num_pages_ballooned +
- compute_balloon_floor();
+ dm->num_pages_ballooned +
+ (dm->num_pages_added > dm->num_pages_onlined ?
+ dm->num_pages_added - dm->num_pages_onlined : 0) +
+ compute_balloon_floor();
/*
* If our transaction ID is no longer current, just don't
@@ -1083,11 +1081,12 @@ static void free_balloon_pages(struct hv_dynmem_device *dm,
-static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
- struct dm_balloon_response *bl_resp, int alloc_unit,
- bool *alloc_error)
+static unsigned int alloc_balloon_pages(struct hv_dynmem_device *dm,
+ unsigned int num_pages,
+ struct dm_balloon_response *bl_resp,
+ int alloc_unit)
{
- int i = 0;
+ unsigned int i = 0;
struct page *pg;
if (num_pages < alloc_unit)
@@ -1106,11 +1105,8 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
__GFP_NOMEMALLOC | __GFP_NOWARN,
get_order(alloc_unit << PAGE_SHIFT));
- if (!pg) {
- *alloc_error = true;
+ if (!pg)
return i * alloc_unit;
- }
-
dm->num_pages_ballooned += alloc_unit;
@@ -1137,14 +1133,15 @@ static int alloc_balloon_pages(struct hv_dynmem_device *dm, int num_pages,
static void balloon_up(struct work_struct *dummy)
{
- int num_pages = dm_device.balloon_wrk.num_pages;
- int num_ballooned = 0;
+ unsigned int num_pages = dm_device.balloon_wrk.num_pages;
+ unsigned int num_ballooned = 0;
struct dm_balloon_response *bl_resp;
int alloc_unit;
int ret;
- bool alloc_error;
bool done = false;
int i;
+ struct sysinfo val;
+ unsigned long floor;
/* The host balloons pages in 2M granularity. */
WARN_ON_ONCE(num_pages % PAGES_IN_2M != 0);
@@ -1155,6 +1152,15 @@ static void balloon_up(struct work_struct *dummy)
*/
alloc_unit = 512;
+ si_meminfo(&val);
+ floor = compute_balloon_floor();
+
+ /* Refuse to balloon below the floor, keep the 2M granularity. */
+ if (val.freeram < num_pages || val.freeram - num_pages < floor) {
+ num_pages = val.freeram > floor ? (val.freeram - floor) : 0;
+ num_pages -= num_pages % PAGES_IN_2M;
+ }
+
while (!done) {
bl_resp = (struct dm_balloon_response *)send_buffer;
memset(send_buffer, 0, PAGE_SIZE);
@@ -1164,18 +1170,15 @@ static void balloon_up(struct work_struct *dummy)
num_pages -= num_ballooned;
- alloc_error = false;
num_ballooned = alloc_balloon_pages(&dm_device, num_pages,
- bl_resp, alloc_unit,
- &alloc_error);
+ bl_resp, alloc_unit);
if (alloc_unit != 1 && num_ballooned == 0) {
alloc_unit = 1;
continue;
}
- if ((alloc_unit == 1 && alloc_error) ||
- (num_ballooned == num_pages)) {
+ if (num_ballooned == 0 || num_ballooned == num_pages) {
bl_resp->more_pages = 0;
done = true;
dm_device.state = DM_INITIALIZED;
@@ -1414,7 +1417,8 @@ static void balloon_onchannelcallback(void *context)
static int balloon_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
{
- int ret, t;
+ int ret;
+ unsigned long t;
struct dm_version_request version_req;
struct dm_capabilities cap_msg;
@@ -1439,7 +1443,6 @@ static int balloon_probe(struct hv_device *dev,
dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7;
init_completion(&dm_device.host_event);
init_completion(&dm_device.config_event);
- init_completion(&dm_device.waiter_event);
INIT_LIST_HEAD(&dm_device.ha_region_list);
mutex_init(&dm_device.ha_region_mutex);
INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);