From d608715d4771cf2d63de07a5d7b026b6f52a70a5 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Tue, 2 Mar 2021 13:38:18 -0800
Subject: Drivers: hv: vmbus: Move handling of VMbus interrupts

VMbus interrupts are most naturally modelled as per-cpu IRQs.  But
because x86/x64 doesn't have per-cpu IRQs, the core VMbus interrupt
handling machinery is done in code under arch/x86 and Linux IRQs are
not used.  Adding support for ARM64 means adding equivalent code
using per-cpu IRQs under arch/arm64.

A better model is to treat per-cpu IRQs as the normal path (which it is
for modern architectures), and the x86/x64 path as the exception.  Do this
by incorporating standard Linux per-cpu IRQ allocation into the main VMbus
driver, and bypassing it in the x86/x64 exception case. For x86/x64,
special case code is retained under arch/x86, but no VMbus interrupt
handling code is needed under arch/arm64.

No functional change.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Reviewed-by: Boqun Feng <boqun.feng@gmail.com>
Link: https://lore.kernel.org/r/1614721102-2241-7-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/kernel/cpu/mshyperv.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'arch/x86/kernel/cpu/mshyperv.c')

diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index e88bc296afca..41fd84a88783 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -60,23 +60,18 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
 	set_irq_regs(old_regs);
 }
 
-int hv_setup_vmbus_irq(int irq, void (*handler)(void))
+void hv_setup_vmbus_handler(void (*handler)(void))
 {
-	/*
-	 * The 'irq' argument is ignored on x86/x64 because a hard-coded
-	 * interrupt vector is used for Hyper-V interrupts.
-	 */
 	vmbus_handler = handler;
-	return 0;
 }
+EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler);
 
-void hv_remove_vmbus_irq(void)
+void hv_remove_vmbus_handler(void)
 {
 	/* We have no way to deallocate the interrupt gate */
 	vmbus_handler = NULL;
 }
-EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq);
-EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq);
+EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler);
 
 /*
  * Routines to do per-architecture handling of stimer0
-- 
cgit v1.2.3


From ec866be6ec547c9e1cc4451f04250e08b5fe67c7 Mon Sep 17 00:00:00 2001
From: Michael Kelley <mikelley@microsoft.com>
Date: Tue, 2 Mar 2021 13:38:22 -0800
Subject: clocksource/drivers/hyper-v: Move handling of STIMER0 interrupts

STIMER0 interrupts are most naturally modeled as per-cpu IRQs. But
because x86/x64 doesn't have per-cpu IRQs, the core STIMER0 interrupt
handling machinery is done in code under arch/x86 and Linux IRQs are
not used. Adding support for ARM64 means adding equivalent code
using per-cpu IRQs under arch/arm64.

A better model is to treat per-cpu IRQs as the normal path (which it is
for modern architectures), and the x86/x64 path as the exception. Do this
by incorporating standard Linux per-cpu IRQ allocation into the main
SITMER0 driver code, and bypass it in the x86/x64 exception case. For
x86/x64, special case code is retained under arch/x86, but no STIMER0
interrupt handling code is needed under arch/arm64.

No functional change.

Signed-off-by: Michael Kelley <mikelley@microsoft.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Link: https://lore.kernel.org/r/1614721102-2241-11-git-send-email-mikelley@microsoft.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/hyperv/hv_init.c          |   2 +-
 arch/x86/include/asm/mshyperv.h    |   4 -
 arch/x86/kernel/cpu/mshyperv.c     |  10 +--
 drivers/clocksource/hyperv_timer.c | 168 +++++++++++++++++++++++++------------
 include/asm-generic/mshyperv.h     |   5 --
 include/clocksource/hyperv_timer.h |   3 +-
 6 files changed, 120 insertions(+), 72 deletions(-)

(limited to 'arch/x86/kernel/cpu/mshyperv.c')

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 9af4f8a57f36..9d100257b3af 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -327,7 +327,7 @@ static void __init hv_stimer_setup_percpu_clockev(void)
 	 * Ignore any errors in setting up stimer clockevents
 	 * as we can run with the LAPIC timer as a fallback.
 	 */
-	(void)hv_stimer_alloc();
+	(void)hv_stimer_alloc(false);
 
 	/*
 	 * Still register the LAPIC timer, because the direct-mode STIMER is
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 5433312c9041..6d4891bced80 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -31,10 +31,6 @@ static inline u64 hv_get_register(unsigned int reg)
 
 void hyperv_vector_handler(struct pt_regs *regs);
 
-static inline void hv_enable_stimer0_percpu_irq(int irq) {}
-static inline void hv_disable_stimer0_percpu_irq(int irq) {}
-
-
 #if IS_ENABLED(CONFIG_HYPERV)
 extern int hyperv_init_cpuhp;
 
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 41fd84a88783..cebed535ec56 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -90,21 +90,17 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
 	set_irq_regs(old_regs);
 }
 
-int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void))
+/* For x86/x64, override weak placeholders in hyperv_timer.c */
+void hv_setup_stimer0_handler(void (*handler)(void))
 {
-	*vector = HYPERV_STIMER0_VECTOR;
-	*irq = -1;   /* Unused on x86/x64 */
 	hv_stimer0_handler = handler;
-	return 0;
 }
-EXPORT_SYMBOL_GPL(hv_setup_stimer0_irq);
 
-void hv_remove_stimer0_irq(int irq)
+void hv_remove_stimer0_handler(void)
 {
 	/* We have no way to deallocate the interrupt gate */
 	hv_stimer0_handler = NULL;
 }
-EXPORT_SYMBOL_GPL(hv_remove_stimer0_irq);
 
 void hv_setup_kexec_handler(void (*handler)(void))
 {
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index 7a9030ca68c4..ce94f78eb851 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -18,6 +18,9 @@
 #include <linux/sched_clock.h>
 #include <linux/mm.h>
 #include <linux/cpuhotplug.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/acpi.h>
 #include <clocksource/hyperv_timer.h>
 #include <asm/hyperv-tlfs.h>
 #include <asm/mshyperv.h>
@@ -43,14 +46,13 @@ static u64 hv_sched_clock_offset __ro_after_init;
  */
 static bool direct_mode_enabled;
 
-static int stimer0_irq;
-static int stimer0_vector;
+static int stimer0_irq = -1;
 static int stimer0_message_sint;
+static DEFINE_PER_CPU(long, stimer0_evt);
 
 /*
- * ISR for when stimer0 is operating in Direct Mode.  Direct Mode
- * does not use VMbus or any VMbus messages, so process here and not
- * in the VMbus driver code.
+ * Common code for stimer0 interrupts coming via Direct Mode or
+ * as a VMbus message.
  */
 void hv_stimer0_isr(void)
 {
@@ -61,6 +63,16 @@ void hv_stimer0_isr(void)
 }
 EXPORT_SYMBOL_GPL(hv_stimer0_isr);
 
+/*
+ * stimer0 interrupt handler for architectures that support
+ * per-cpu interrupts, which also implies Direct Mode.
+ */
+static irqreturn_t hv_stimer0_percpu_isr(int irq, void *dev_id)
+{
+	hv_stimer0_isr();
+	return IRQ_HANDLED;
+}
+
 static int hv_ce_set_next_event(unsigned long delta,
 				struct clock_event_device *evt)
 {
@@ -76,8 +88,8 @@ static int hv_ce_shutdown(struct clock_event_device *evt)
 {
 	hv_set_register(HV_REGISTER_STIMER0_COUNT, 0);
 	hv_set_register(HV_REGISTER_STIMER0_CONFIG, 0);
-	if (direct_mode_enabled)
-		hv_disable_stimer0_percpu_irq(stimer0_irq);
+	if (direct_mode_enabled && stimer0_irq >= 0)
+		disable_percpu_irq(stimer0_irq);
 
 	return 0;
 }
@@ -95,8 +107,9 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt)
 		 * on the specified hardware vector/IRQ.
 		 */
 		timer_cfg.direct_mode = 1;
-		timer_cfg.apic_vector = stimer0_vector;
-		hv_enable_stimer0_percpu_irq(stimer0_irq);
+		timer_cfg.apic_vector = HYPERV_STIMER0_VECTOR;
+		if (stimer0_irq >= 0)
+			enable_percpu_irq(stimer0_irq, IRQ_TYPE_NONE);
 	} else {
 		/*
 		 * When it expires, the timer will generate a VMbus message,
@@ -169,10 +182,58 @@ int hv_stimer_cleanup(unsigned int cpu)
 }
 EXPORT_SYMBOL_GPL(hv_stimer_cleanup);
 
+/*
+ * These placeholders are overridden by arch specific code on
+ * architectures that need special setup of the stimer0 IRQ because
+ * they don't support per-cpu IRQs (such as x86/x64).
+ */
+void __weak hv_setup_stimer0_handler(void (*handler)(void))
+{
+};
+
+void __weak hv_remove_stimer0_handler(void)
+{
+};
+
+/* Called only on architectures with per-cpu IRQs (i.e., not x86/x64) */
+static int hv_setup_stimer0_irq(void)
+{
+	int ret;
+
+	ret = acpi_register_gsi(NULL, HYPERV_STIMER0_VECTOR,
+			ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_HIGH);
+	if (ret < 0) {
+		pr_err("Can't register Hyper-V stimer0 GSI. Error %d", ret);
+		return ret;
+	}
+	stimer0_irq = ret;
+
+	ret = request_percpu_irq(stimer0_irq, hv_stimer0_percpu_isr,
+		"Hyper-V stimer0", &stimer0_evt);
+	if (ret) {
+		pr_err("Can't request Hyper-V stimer0 IRQ %d. Error %d",
+			stimer0_irq, ret);
+		acpi_unregister_gsi(stimer0_irq);
+		stimer0_irq = -1;
+	}
+	return ret;
+}
+
+static void hv_remove_stimer0_irq(void)
+{
+	if (stimer0_irq == -1) {
+		hv_remove_stimer0_handler();
+	} else {
+		free_percpu_irq(stimer0_irq, &stimer0_evt);
+		acpi_unregister_gsi(stimer0_irq);
+		stimer0_irq = -1;
+	}
+}
+
 /* hv_stimer_alloc - Global initialization of the clockevent and stimer0 */
-int hv_stimer_alloc(void)
+int hv_stimer_alloc(bool have_percpu_irqs)
 {
-	int ret = 0;
+	int ret;
 
 	/*
 	 * Synthetic timers are always available except on old versions of
@@ -188,29 +249,37 @@ int hv_stimer_alloc(void)
 
 	direct_mode_enabled = ms_hyperv.misc_features &
 			HV_STIMER_DIRECT_MODE_AVAILABLE;
-	if (direct_mode_enabled) {
-		ret = hv_setup_stimer0_irq(&stimer0_irq, &stimer0_vector,
-				hv_stimer0_isr);
+
+	/*
+	 * If Direct Mode isn't enabled, the remainder of the initialization
+	 * is done later by hv_stimer_legacy_init()
+	 */
+	if (!direct_mode_enabled)
+		return 0;
+
+	if (have_percpu_irqs) {
+		ret = hv_setup_stimer0_irq();
 		if (ret)
-			goto free_percpu;
+			goto free_clock_event;
+	} else {
+		hv_setup_stimer0_handler(hv_stimer0_isr);
+	}
 
-		/*
-		 * Since we are in Direct Mode, stimer initialization
-		 * can be done now with a CPUHP value in the same range
-		 * as other clockevent devices.
-		 */
-		ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
-				"clockevents/hyperv/stimer:starting",
-				hv_stimer_init, hv_stimer_cleanup);
-		if (ret < 0)
-			goto free_stimer0_irq;
+	/*
+	 * Since we are in Direct Mode, stimer initialization
+	 * can be done now with a CPUHP value in the same range
+	 * as other clockevent devices.
+	 */
+	ret = cpuhp_setup_state(CPUHP_AP_HYPERV_TIMER_STARTING,
+			"clockevents/hyperv/stimer:starting",
+			hv_stimer_init, hv_stimer_cleanup);
+	if (ret < 0) {
+		hv_remove_stimer0_irq();
+		goto free_clock_event;
 	}
 	return ret;
 
-free_stimer0_irq:
-	hv_remove_stimer0_irq(stimer0_irq);
-	stimer0_irq = 0;
-free_percpu:
+free_clock_event:
 	free_percpu(hv_clock_event);
 	hv_clock_event = NULL;
 	return ret;
@@ -254,23 +323,6 @@ void hv_stimer_legacy_cleanup(unsigned int cpu)
 }
 EXPORT_SYMBOL_GPL(hv_stimer_legacy_cleanup);
 
-
-/* hv_stimer_free - Free global resources allocated by hv_stimer_alloc() */
-void hv_stimer_free(void)
-{
-	if (!hv_clock_event)
-		return;
-
-	if (direct_mode_enabled) {
-		cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
-		hv_remove_stimer0_irq(stimer0_irq);
-		stimer0_irq = 0;
-	}
-	free_percpu(hv_clock_event);
-	hv_clock_event = NULL;
-}
-EXPORT_SYMBOL_GPL(hv_stimer_free);
-
 /*
  * Do a global cleanup of clockevents for the cases of kexec and
  * vmbus exit
@@ -287,12 +339,17 @@ void hv_stimer_global_cleanup(void)
 		hv_stimer_legacy_cleanup(cpu);
 	}
 
-	/*
-	 * If Direct Mode is enabled, the cpuhp teardown callback
-	 * (hv_stimer_cleanup) will be run on all CPUs to stop the
-	 * stimers.
-	 */
-	hv_stimer_free();
+	if (!hv_clock_event)
+		return;
+
+	if (direct_mode_enabled) {
+		cpuhp_remove_state(CPUHP_AP_HYPERV_TIMER_STARTING);
+		hv_remove_stimer0_irq();
+		stimer0_irq = -1;
+	}
+	free_percpu(hv_clock_event);
+	hv_clock_event = NULL;
+
 }
 EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup);
 
@@ -457,9 +514,14 @@ static bool __init hv_init_tsc_clocksource(void)
 	 * Hyper-V Reference TSC rating, causing the generic TSC to be used.
 	 * TSC_INVARIANT is not offered on ARM64, so the Hyper-V Reference
 	 * TSC will be preferred over the virtualized ARM64 arch counter.
+	 * While the Hyper-V MSR clocksource won't be used since the
+	 * Reference TSC clocksource is present, change its rating as
+	 * well for consistency.
 	 */
-	if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)
+	if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) {
 		hyperv_cs_tsc.rating = 250;
+		hyperv_cs_msr.rating = 250;
+	}
 
 	hv_read_reference_counter = read_hv_clock_tsc;
 	phys_addr = virt_to_phys(hv_get_tsc_page());
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 43dc3715a98a..69e7fe0254cf 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -183,9 +183,4 @@ static inline bool hv_is_hibernation_supported(void) { return false; }
 static inline void hyperv_cleanup(void) {}
 #endif /* CONFIG_HYPERV */
 
-#if IS_ENABLED(CONFIG_HYPERV)
-extern int hv_setup_stimer0_irq(int *irq, int *vector, void (*handler)(void));
-extern void hv_remove_stimer0_irq(int irq);
-#endif
-
 #endif
diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h
index 34eef083c988..b6774aa5a4b8 100644
--- a/include/clocksource/hyperv_timer.h
+++ b/include/clocksource/hyperv_timer.h
@@ -21,8 +21,7 @@
 #define HV_MIN_DELTA_TICKS 1
 
 /* Routines called by the VMbus driver */
-extern int hv_stimer_alloc(void);
-extern void hv_stimer_free(void);
+extern int hv_stimer_alloc(bool have_percpu_irqs);
 extern int hv_stimer_cleanup(unsigned int cpu);
 extern void hv_stimer_legacy_init(unsigned int cpu, int sint);
 extern void hv_stimer_legacy_cleanup(unsigned int cpu);
-- 
cgit v1.2.3


From 6dc2a774cb4fdb524b7eb0b8db74198a1b4815ea Mon Sep 17 00:00:00 2001
From: Sunil Muthuswamy <sunilmut@microsoft.com>
Date: Tue, 23 Mar 2021 18:47:16 +0000
Subject: x86/Hyper-V: Support for free page reporting

Linux has support for free page reporting now (36e66c554b5c) for
virtualized environment. On Hyper-V when virtually backed VMs are
configured, Hyper-V will advertise cold memory discard capability,
when supported. This patch adds the support to hook into the free
page reporting infrastructure and leverage the Hyper-V cold memory
discard hint hypercall to report/free these pages back to the host.

Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Tested-by: Matheus Castello <matheus@castello.eng.br>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/SN4PR2101MB0880121FA4E2FEC67F35C1DCC0649@SN4PR2101MB0880.namprd21.prod.outlook.com
Signed-off-by: Wei Liu <wei.liu@kernel.org>
---
 arch/x86/hyperv/hv_init.c         | 51 +++++++++++++++++++++-
 arch/x86/kernel/cpu/mshyperv.c    |  9 ++--
 drivers/hv/Kconfig                |  1 +
 drivers/hv/hv_balloon.c           | 89 +++++++++++++++++++++++++++++++++++++++
 include/asm-generic/hyperv-tlfs.h | 35 ++++++++++++++-
 include/asm-generic/mshyperv.h    |  3 +-
 6 files changed, 180 insertions(+), 8 deletions(-)

(limited to 'arch/x86/kernel/cpu/mshyperv.c')

diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 9d100257b3af..7c9da3f65afa 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -498,6 +498,8 @@ void __init hyperv_init(void)
 		x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain;
 #endif
 
+	/* Query the VMs extended capability once, so that it can be cached. */
+	hv_query_ext_cap(0);
 	return;
 
 remove_cpuhp_state:
@@ -601,7 +603,7 @@ EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
 
 enum hv_isolation_type hv_get_isolation_type(void)
 {
-	if (!(ms_hyperv.features_b & HV_ISOLATION))
+	if (!(ms_hyperv.priv_high & HV_ISOLATION))
 		return HV_ISOLATION_TYPE_NONE;
 	return FIELD_GET(HV_ISOLATION_TYPE, ms_hyperv.isolation_config_b);
 }
@@ -612,3 +614,50 @@ bool hv_is_isolation_supported(void)
 	return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE;
 }
 EXPORT_SYMBOL_GPL(hv_is_isolation_supported);
+
+/* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */
+bool hv_query_ext_cap(u64 cap_query)
+{
+	/*
+	 * The address of the 'hv_extended_cap' variable will be used as an
+	 * output parameter to the hypercall below and so it should be
+	 * compatible with 'virt_to_phys'. Which means, it's address should be
+	 * directly mapped. Use 'static' to keep it compatible; stack variables
+	 * can be virtually mapped, making them imcompatible with
+	 * 'virt_to_phys'.
+	 * Hypercall input/output addresses should also be 8-byte aligned.
+	 */
+	static u64 hv_extended_cap __aligned(8);
+	static bool hv_extended_cap_queried;
+	u64 status;
+
+	/*
+	 * Querying extended capabilities is an extended hypercall. Check if the
+	 * partition supports extended hypercall, first.
+	 */
+	if (!(ms_hyperv.priv_high & HV_ENABLE_EXTENDED_HYPERCALLS))
+		return false;
+
+	/* Extended capabilities do not change at runtime. */
+	if (hv_extended_cap_queried)
+		return hv_extended_cap & cap_query;
+
+	status = hv_do_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, NULL,
+				 &hv_extended_cap);
+
+	/*
+	 * The query extended capabilities hypercall should not fail under
+	 * any normal circumstances. Avoid repeatedly making the hypercall, on
+	 * error.
+	 */
+	hv_extended_cap_queried = true;
+	status &= HV_HYPERCALL_RESULT_MASK;
+	if (status != HV_STATUS_SUCCESS) {
+		pr_err("Hyper-V: Extended query capabilities hypercall failed 0x%llx\n",
+		       status);
+		return false;
+	}
+
+	return hv_extended_cap & cap_query;
+}
+EXPORT_SYMBOL_GPL(hv_query_ext_cap);
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index cebed535ec56..3546d3e21787 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -265,12 +265,13 @@ static void __init ms_hyperv_init_platform(void)
 	 * Extract the features and hints
 	 */
 	ms_hyperv.features = cpuid_eax(HYPERV_CPUID_FEATURES);
-	ms_hyperv.features_b = cpuid_ebx(HYPERV_CPUID_FEATURES);
+	ms_hyperv.priv_high = cpuid_ebx(HYPERV_CPUID_FEATURES);
 	ms_hyperv.misc_features = cpuid_edx(HYPERV_CPUID_FEATURES);
 	ms_hyperv.hints    = cpuid_eax(HYPERV_CPUID_ENLIGHTMENT_INFO);
 
-	pr_info("Hyper-V: features 0x%x, hints 0x%x, misc 0x%x\n",
-		ms_hyperv.features, ms_hyperv.hints, ms_hyperv.misc_features);
+	pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n",
+		ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
+		ms_hyperv.misc_features);
 
 	ms_hyperv.max_vp_index = cpuid_eax(HYPERV_CPUID_IMPLEMENT_LIMITS);
 	ms_hyperv.max_lp_index = cpuid_ebx(HYPERV_CPUID_IMPLEMENT_LIMITS);
@@ -316,7 +317,7 @@ static void __init ms_hyperv_init_platform(void)
 		x86_platform.calibrate_cpu = hv_get_tsc_khz;
 	}
 
-	if (ms_hyperv.features_b & HV_ISOLATION) {
+	if (ms_hyperv.priv_high & HV_ISOLATION) {
 		ms_hyperv.isolation_config_a = cpuid_eax(HYPERV_CPUID_ISOLATION_CONFIG);
 		ms_hyperv.isolation_config_b = cpuid_ebx(HYPERV_CPUID_ISOLATION_CONFIG);
 
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 79e5356a737a..66c794d92391 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -23,6 +23,7 @@ config HYPERV_UTILS
 config HYPERV_BALLOON
 	tristate "Microsoft Hyper-V Balloon driver"
 	depends on HYPERV
+	select PAGE_REPORTING
 	help
 	  Select this option to enable Hyper-V Balloon driver.
 
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 2f776d78e3c1..58af84e30144 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -21,6 +21,7 @@
 #include <linux/memory.h>
 #include <linux/notifier.h>
 #include <linux/percpu_counter.h>
+#include <linux/page_reporting.h>
 
 #include <linux/hyperv.h>
 #include <asm/hyperv-tlfs.h>
@@ -563,6 +564,8 @@ struct hv_dynmem_device {
 	 * The negotiated version agreed by host.
 	 */
 	__u32 version;
+
+	struct page_reporting_dev_info pr_dev_info;
 };
 
 static struct hv_dynmem_device dm_device;
@@ -1568,6 +1571,89 @@ static void balloon_onchannelcallback(void *context)
 
 }
 
+/* Hyper-V only supports reporting 2MB pages or higher */
+#define HV_MIN_PAGE_REPORTING_ORDER	9
+#define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTING_ORDER)
+static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
+		    struct scatterlist *sgl, unsigned int nents)
+{
+	unsigned long flags;
+	struct hv_memory_hint *hint;
+	int i;
+	u64 status;
+	struct scatterlist *sg;
+
+	WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
+	WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN);
+	local_irq_save(flags);
+	hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg);
+	if (!hint) {
+		local_irq_restore(flags);
+		return -ENOSPC;
+	}
+
+	hint->type = HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD;
+	hint->reserved = 0;
+	for_each_sg(sgl, sg, nents, i) {
+		union hv_gpa_page_range *range;
+
+		range = &hint->ranges[i];
+		range->address_space = 0;
+		/* page reporting only reports 2MB pages or higher */
+		range->page.largepage = 1;
+		range->page.additional_pages =
+			(sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1;
+		range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB;
+		range->base_large_pfn =
+			page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER;
+	}
+
+	status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0,
+				     hint, NULL);
+	local_irq_restore(flags);
+	if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) {
+		pr_err("Cold memory discard hypercall failed with status %llx\n",
+			status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void enable_page_reporting(void)
+{
+	int ret;
+
+	/* Essentially, validating 'PAGE_REPORTING_MIN_ORDER' is big enough. */
+	if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) {
+		pr_debug("Cold memory discard is only supported on 2MB pages and above\n");
+		return;
+	}
+
+	if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) {
+		pr_debug("Cold memory discard hint not supported by Hyper-V\n");
+		return;
+	}
+
+	BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
+	dm_device.pr_dev_info.report = hv_free_page_report;
+	ret = page_reporting_register(&dm_device.pr_dev_info);
+	if (ret < 0) {
+		dm_device.pr_dev_info.report = NULL;
+		pr_err("Failed to enable cold memory discard: %d\n", ret);
+	} else {
+		pr_info("Cold memory discard hint enabled\n");
+	}
+}
+
+static void disable_page_reporting(void)
+{
+	if (dm_device.pr_dev_info.report) {
+		page_reporting_unregister(&dm_device.pr_dev_info);
+		dm_device.pr_dev_info.report = NULL;
+	}
+}
+
 static int balloon_connect_vsp(struct hv_device *dev)
 {
 	struct dm_version_request version_req;
@@ -1713,6 +1799,7 @@ static int balloon_probe(struct hv_device *dev,
 	if (ret != 0)
 		return ret;
 
+	enable_page_reporting();
 	dm_device.state = DM_INITIALIZED;
 
 	dm_device.thread =
@@ -1727,6 +1814,7 @@ static int balloon_probe(struct hv_device *dev,
 probe_error:
 	dm_device.state = DM_INIT_ERROR;
 	dm_device.thread  = NULL;
+	disable_page_reporting();
 	vmbus_close(dev->channel);
 #ifdef CONFIG_MEMORY_HOTPLUG
 	unregister_memory_notifier(&hv_memory_nb);
@@ -1749,6 +1837,7 @@ static int balloon_remove(struct hv_device *dev)
 	cancel_work_sync(&dm->ha_wrk.wrk);
 
 	kthread_stop(dm->thread);
+	disable_page_reporting();
 	vmbus_close(dev->channel);
 #ifdef CONFIG_MEMORY_HOTPLUG
 	unregister_memory_notifier(&hv_memory_nb);
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 9cf10837d005..515c3fb06ab3 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -89,9 +89,9 @@
 #define HV_ACCESS_STATS				BIT(8)
 #define HV_DEBUGGING				BIT(11)
 #define HV_CPU_MANAGEMENT			BIT(12)
+#define HV_ENABLE_EXTENDED_HYPERCALLS		BIT(20)
 #define HV_ISOLATION				BIT(22)
 
-
 /*
  * TSC page layout.
  */
@@ -159,11 +159,18 @@ struct ms_hyperv_tsc_page {
 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
 
+/* Extended hypercalls */
+#define HV_EXT_CALL_QUERY_CAPABILITIES		0x8001
+#define HV_EXT_CALL_MEMORY_HEAT_HINT		0x8003
+
 #define HV_FLUSH_ALL_PROCESSORS			BIT(0)
 #define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
 #define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
 #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
 
+/* Extended capability bits */
+#define HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT BIT(8)
+
 enum HV_GENERIC_SET_FORMAT {
 	HV_GENERIC_SET_SPARSE_4K,
 	HV_GENERIC_SET_ALL,
@@ -408,8 +415,10 @@ struct hv_guest_mapping_flush {
  *  by the bitwidth of "additional_pages" in union hv_gpa_page_range.
  */
 #define HV_MAX_FLUSH_PAGES (2048)
+#define HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB		0
+#define HV_GPA_PAGE_RANGE_PAGE_SIZE_1GB		1
 
-/* HvFlushGuestPhysicalAddressList hypercall */
+/* HvFlushGuestPhysicalAddressList, HvExtCallMemoryHeatHint hypercall */
 union hv_gpa_page_range {
 	u64 address_space;
 	struct {
@@ -417,6 +426,12 @@ union hv_gpa_page_range {
 		u64 largepage:1;
 		u64 basepfn:52;
 	} page;
+	struct {
+		u64 reserved:12;
+		u64 page_size:1;
+		u64 reserved1:8;
+		u64 base_large_pfn:43;
+	};
 };
 
 /*
@@ -774,4 +789,20 @@ struct hv_input_unmap_device_interrupt {
 #define HV_SOURCE_SHADOW_NONE               0x0
 #define HV_SOURCE_SHADOW_BRIDGE_BUS_RANGE   0x1
 
+/*
+ * The whole argument should fit in a page to be able to pass to the hypervisor
+ * in one hypercall.
+ */
+#define HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES  \
+	((HV_HYP_PAGE_SIZE - sizeof(struct hv_memory_hint)) / \
+		sizeof(union hv_gpa_page_range))
+
+/* HvExtCallMemoryHeatHint hypercall */
+#define HV_EXT_MEMORY_HEAT_HINT_TYPE_COLD_DISCARD	2
+struct hv_memory_hint {
+	u64 type:2;
+	u64 reserved:62;
+	union hv_gpa_page_range ranges[];
+} __packed;
+
 #endif
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 2d1b6cd9f000..63c0e579bf6d 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -27,7 +27,7 @@
 
 struct ms_hyperv_info {
 	u32 features;
-	u32 features_b;
+	u32 priv_high;
 	u32 misc_features;
 	u32 hints;
 	u32 nested_features;
@@ -179,6 +179,7 @@ bool hv_is_hibernation_supported(void);
 enum hv_isolation_type hv_get_isolation_type(void);
 bool hv_is_isolation_supported(void);
 void hyperv_cleanup(void);
+bool hv_query_ext_cap(u64 cap_query);
 #else /* CONFIG_HYPERV */
 static inline bool hv_is_hyperv_initialized(void) { return false; }
 static inline bool hv_is_hibernation_supported(void) { return false; }
-- 
cgit v1.2.3