539 files changed, 11648 insertions, 4307 deletions
diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c
index a1d177d58254..21932d640a41 100644
--- a/drivers/acpi/acpica/exconfig.c
+++ b/drivers/acpi/acpica/exconfig.c
@@ -108,7 +108,9 @@ acpi_ex_add_table(u32 table_index,
 
 	/* Add the table to the namespace */
 
+	acpi_ex_exit_interpreter();
 	status = acpi_ns_load_table(table_index, parent_node);
+	acpi_ex_enter_interpreter();
 	if (ACPI_FAILURE(status)) {
 		acpi_ut_remove_reference(obj_desc);
 		*ddb_handle = NULL;
diff --git a/drivers/acpi/acpica/hwregs.c b/drivers/acpi/acpica/hwregs.c
index daceb80022b0..3b7fb99362b6 100644
--- a/drivers/acpi/acpica/hwregs.c
+++ b/drivers/acpi/acpica/hwregs.c
@@ -306,12 +306,6 @@ acpi_status acpi_hw_read(u32 *value, struct acpi_generic_address *reg)
 acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
 {
 	u64 address;
-	u8 access_width;
-	u32 bit_width;
-	u8 bit_offset;
-	u64 value64;
-	u32 new_value32, old_value32;
-	u8 index;
 	acpi_status status;
 
 	ACPI_FUNCTION_NAME(hw_write);
@@ -323,145 +317,23 @@ acpi_status acpi_hw_write(u32 value, struct acpi_generic_address *reg)
 		return (status);
 	}
 
-	/* Convert access_width into number of bits based */
-
-	access_width = acpi_hw_get_access_bit_width(reg, 32);
-	bit_width = reg->bit_offset + reg->bit_width;
-	bit_offset = reg->bit_offset;
-
 	/*
 	 * Two address spaces supported: Memory or IO. PCI_Config is
 	 * not supported here because the GAS structure is insufficient
 	 */
-	index = 0;
-	while (bit_width) {
-		/*
-		 * Use offset style bit reads because "Index * AccessWidth" is
-		 * ensured to be less than 32-bits by acpi_hw_validate_register().
-		 */
-		new_value32 = ACPI_GET_BITS(&value, index * access_width,
-					    ACPI_MASK_BITS_ABOVE_32
-					    (access_width));
-
-		if (bit_offset >= access_width) {
-			bit_offset -= access_width;
-		} else {
-			/*
-			 * Use offset style bit masks because access_width is ensured
-			 * to be less than 32-bits by acpi_hw_validate_register() and
-			 * bit_offset/bit_width is less than access_width here.
-			 */
-			if (bit_offset) {
-				new_value32 &= ACPI_MASK_BITS_BELOW(bit_offset);
-			}
-			if (bit_width < access_width) {
-				new_value32 &= ACPI_MASK_BITS_ABOVE(bit_width);
-			}
-
-			if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
-				if (bit_offset || bit_width < access_width) {
-					/*
-					 * Read old values in order not to modify the bits that
-					 * are beyond the register bit_width/bit_offset setting.
-					 */
-					status =
-					    acpi_os_read_memory((acpi_physical_address)
-								address +
-								index *
-								ACPI_DIV_8
-								(access_width),
-								&value64,
-								access_width);
-					old_value32 = (u32)value64;
-
-					/*
-					 * Use offset style bit masks because access_width is
-					 * ensured to be less than 32-bits by
-					 * acpi_hw_validate_register() and bit_offset/bit_width is
-					 * less than access_width here.
-					 */
-					if (bit_offset) {
-						old_value32 &=
-						    ACPI_MASK_BITS_ABOVE
-						    (bit_offset);
-						bit_offset = 0;
-					}
-					if (bit_width < access_width) {
-						old_value32 &=
-						    ACPI_MASK_BITS_BELOW
-						    (bit_width);
-					}
-
-					new_value32 |= old_value32;
-				}
-
-				value64 = (u64)new_value32;
-				status =
-				    acpi_os_write_memory((acpi_physical_address)
-							 address +
-							 index *
-							 ACPI_DIV_8
-							 (access_width),
-							 value64, access_width);
-			} else {	/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
-
-				if (bit_offset || bit_width < access_width) {
-					/*
-					 * Read old values in order not to modify the bits that
-					 * are beyond the register bit_width/bit_offset setting.
-					 */
-					status =
-					    acpi_hw_read_port((acpi_io_address)
-							      address +
-							      index *
-							      ACPI_DIV_8
-							      (access_width),
-							      &old_value32,
-							      access_width);
-
-					/*
-					 * Use offset style bit masks because access_width is
-					 * ensured to be less than 32-bits by
-					 * acpi_hw_validate_register() and bit_offset/bit_width is
-					 * less than access_width here.
-					 */
-					if (bit_offset) {
-						old_value32 &=
-						    ACPI_MASK_BITS_ABOVE
-						    (bit_offset);
-						bit_offset = 0;
-					}
-					if (bit_width < access_width) {
-						old_value32 &=
-						    ACPI_MASK_BITS_BELOW
-						    (bit_width);
-					}
-
-					new_value32 |= old_value32;
-				}
-
-				status = acpi_hw_write_port((acpi_io_address)
-							    address +
-							    index *
-							    ACPI_DIV_8
-							    (access_width),
-							    new_value32,
-							    access_width);
-			}
-		}
-
-		/*
-		 * Index * access_width is ensured to be less than 32-bits by
-		 * acpi_hw_validate_register().
-		 */
-		bit_width -=
-		    bit_width > access_width ? access_width : bit_width;
-		index++;
+	if (reg->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) {
+		status = acpi_os_write_memory((acpi_physical_address)
+					      address, (u64)value,
+					      reg->bit_width);
+	} else {		/* ACPI_ADR_SPACE_SYSTEM_IO, validated earlier */
+
+		status = acpi_hw_write_port((acpi_io_address)
+					    address, value, reg->bit_width);
 	}
 
 	ACPI_DEBUG_PRINT((ACPI_DB_IO,
 			  "Wrote: %8.8X width %2d   to %8.8X%8.8X (%s)\n",
-			  value, access_width, ACPI_FORMAT_UINT64(address),
+			  value, reg->bit_width, ACPI_FORMAT_UINT64(address),
 			  acpi_ut_get_region_name(reg->space_id)));
 
 	return (status);
diff --git a/drivers/acpi/acpica/nsparse.c b/drivers/acpi/acpica/nsparse.c
index f631a47724f0..1783cd7e1446 100644
--- a/drivers/acpi/acpica/nsparse.c
+++ b/drivers/acpi/acpica/nsparse.c
@@ -47,6 +47,7 @@
 #include "acparser.h"
 #include "acdispat.h"
 #include "actables.h"
+#include "acinterp.h"
 
 #define _COMPONENT          ACPI_NAMESPACE
 ACPI_MODULE_NAME("nsparse")
@@ -170,6 +171,8 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 
 	ACPI_FUNCTION_TRACE(ns_parse_table);
 
+	acpi_ex_enter_interpreter();
+
 	/*
 	 * AML Parse, pass 1
 	 *
@@ -185,7 +188,7 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 	status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS1,
 					    table_index, start_node);
 	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
+		goto error_exit;
 	}
 
 	/*
@@ -201,8 +204,10 @@ acpi_ns_parse_table(u32 table_index, struct acpi_namespace_node *start_node)
 	status = acpi_ns_one_complete_parse(ACPI_IMODE_LOAD_PASS2,
 					    table_index, start_node);
 	if (ACPI_FAILURE(status)) {
-		return_ACPI_STATUS(status);
+		goto error_exit;
 	}
 
+error_exit:
+	acpi_ex_exit_interpreter();
 	return_ACPI_STATUS(status);
 }
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 31e8da648fff..262ca31b86d9 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1051,7 +1051,7 @@ static int __init acpi_bus_init(void)
 	 * Maybe EC region is required at bus_scan/acpi_get_devices. So it
 	 * is necessary to enable it as early as possible.
 	 */
-	acpi_boot_ec_enable();
+	acpi_ec_dsdt_probe();
 
 	printk(KERN_INFO PREFIX "Interpreter enabled\n");
 
diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 0e70181f150c..73c76d646064 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1446,10 +1446,30 @@ ec_parse_io_ports(struct acpi_resource *resource, void *context)
 	return AE_OK;
 }
 
-int __init acpi_boot_ec_enable(void)
+static const struct acpi_device_id ec_device_ids[] = {
+	{"PNP0C09", 0},
+	{"", 0},
+};
+
+int __init acpi_ec_dsdt_probe(void)
 {
-	if (!boot_ec)
+	acpi_status status;
+
+	if (boot_ec)
 		return 0;
+
+	/*
+	 * Finding EC from DSDT if there is no ECDT EC available. When this
+	 * function is invoked, ACPI tables have been fully loaded, we can
+	 * walk namespace now.
+	 */
+	boot_ec = make_acpi_ec();
+	if (!boot_ec)
+		return -ENOMEM;
+	status = acpi_get_devices(ec_device_ids[0].id,
+				  ec_parse_device, boot_ec, NULL);
+	if (ACPI_FAILURE(status) || !boot_ec->handle)
+		return -ENODEV;
 	if (!ec_install_handlers(boot_ec)) {
 		first_ec = boot_ec;
 		return 0;
@@ -1457,11 +1477,6 @@ int __init acpi_boot_ec_enable(void)
 	return -EFAULT;
 }
 
-static const struct acpi_device_id ec_device_ids[] = {
-	{"PNP0C09", 0},
-	{"", 0},
-};
-
 #if 0
 /*
  * Some EC firmware variations refuses to respond QR_EC when SCI_EVT is not
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 9bb0773d39bf..27cc7feabfe4 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -181,7 +181,7 @@ typedef int (*acpi_ec_query_func) (void *data);
 
 int acpi_ec_init(void);
 int acpi_ec_ecdt_probe(void);
-int acpi_boot_ec_enable(void);
+int acpi_ec_dsdt_probe(void);
 void acpi_ec_block_transactions(void);
 void acpi_ec_unblock_transactions(void);
 void acpi_ec_unblock_transactions_early(void);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 61dc7a99e89a..c6f017458958 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -606,7 +606,7 @@ void ata_scsi_error(struct Scsi_Host *host)
 	ata_scsi_port_error_handler(host, ap);
 
 	/* finish or retry handled scmd's and clean up */
-	WARN_ON(host->host_failed || !list_empty(&eh_work_q));
+	WARN_ON(!list_empty(&eh_work_q));
 
 	DPRINTK("EXIT\n");
 }
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 6b2a84e7f2be..2609ba20b396 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -10,7 +10,7 @@ obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
 obj-$(CONFIG_HAS_DMA)	+= dma-mapping.o
 obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
-obj-$(CONFIG_ISA)	+= isa.o
+obj-$(CONFIG_ISA_BUS_API)	+= isa.o
 obj-$(CONFIG_FW_LOADER)	+= firmware_class.o
 obj-$(CONFIG_NUMA)	+= node.o
 obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o
diff --git a/drivers/base/isa.c b/drivers/base/isa.c
index 91dba65d7264..cd6ccdcf9df0 100644
--- a/drivers/base/isa.c
+++ b/drivers/base/isa.c
@@ -180,4 +180,4 @@ static int __init isa_bus_init(void)
 	return error;
 }
 
-device_initcall(isa_bus_init);
+postcore_initcall(isa_bus_init);
diff --git a/drivers/base/module.c b/drivers/base/module.c
index db930d3ee312..2a215780eda2 100644
--- a/drivers/base/module.c
+++ b/drivers/base/module.c
@@ -24,10 +24,12 @@ static char *make_driver_name(struct device_driver *drv)
 
 static void module_create_drivers_dir(struct module_kobject *mk)
 {
-	if (!mk || mk->drivers_dir)
-		return;
+	static DEFINE_MUTEX(drivers_dir_mutex);
 
-	mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
+	mutex_lock(&drivers_dir_mutex);
+	if (mk && !mk->drivers_dir)
+		mk->drivers_dir = kobject_create_and_add("drivers", &mk->kobj);
+	mutex_unlock(&drivers_dir_mutex);
 }
 
 void module_add_driver(struct module *mod, struct device_driver *drv)
diff --git a/drivers/base/power/opp/cpu.c b/drivers/base/power/opp/cpu.c
index 83d6e7ba1a34..8c3434bdb26d 100644
--- a/drivers/base/power/opp/cpu.c
+++ b/drivers/base/power/opp/cpu.c
@@ -211,7 +211,7 @@ int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev,
 		}
 
 		/* Mark opp-table as multiple CPUs are sharing it now */
-		opp_table->shared_opp = true;
+		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
 	}
 unlock:
 	mutex_unlock(&opp_table_lock);
@@ -227,7 +227,8 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_set_sharing_cpus);
  *
  * This updates the @cpumask with CPUs that are sharing OPPs with @cpu_dev.
  *
- * Returns -ENODEV if OPP table isn't already present.
+ * Returns -ENODEV if OPP table isn't already present and -EINVAL if the OPP
+ * table's status is access-unknown.
  *
  * Locking: The internal opp_table and opp structures are RCU protected.
  * Hence this function internally uses RCU updater strategy with mutex locks
@@ -249,9 +250,14 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 		goto unlock;
 	}
 
+	if (opp_table->shared_opp == OPP_TABLE_ACCESS_UNKNOWN) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
 	cpumask_clear(cpumask);
 
-	if (opp_table->shared_opp) {
+	if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) {
 		list_for_each_entry(opp_dev, &opp_table->dev_list, node)
 			cpumask_set_cpu(opp_dev->dev->id, cpumask);
 	} else {
diff --git a/drivers/base/power/opp/of.c b/drivers/base/power/opp/of.c
index 94d2010558e3..1dfd3dd92624 100644
--- a/drivers/base/power/opp/of.c
+++ b/drivers/base/power/opp/of.c
@@ -34,7 +34,10 @@ static struct opp_table *_managed_opp(const struct device_node *np)
 			 * But the OPPs will be considered as shared only if the
 			 * OPP table contains a "opp-shared" property.
 			 */
-			return opp_table->shared_opp ? opp_table : NULL;
+			if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED)
+				return opp_table;
+
+			return NULL;
 		}
 	}
 
@@ -353,7 +356,10 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 	}
 
 	opp_table->np = opp_np;
-	opp_table->shared_opp = of_property_read_bool(opp_np, "opp-shared");
+	if (of_property_read_bool(opp_np, "opp-shared"))
+		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
+	else
+		opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE;
 
 	mutex_unlock(&opp_table_lock);
 
diff --git a/drivers/base/power/opp/opp.h b/drivers/base/power/opp/opp.h
index 20f3be22e060..fabd5ca1a083 100644
--- a/drivers/base/power/opp/opp.h
+++ b/drivers/base/power/opp/opp.h
@@ -119,6 +119,12 @@ struct opp_device {
 #endif
 };
 
+enum opp_table_access {
+	OPP_TABLE_ACCESS_UNKNOWN = 0,
+	OPP_TABLE_ACCESS_EXCLUSIVE = 1,
+	OPP_TABLE_ACCESS_SHARED = 2,
+};
+
 /**
  * struct opp_table - Device opp structure
  * @node:	table node - contains the devices with OPPs that
@@ -166,7 +172,7 @@ struct opp_table {
 	/* For backward compatibility with v1 bindings */
 	unsigned int voltage_tolerance_v1;
 
-	bool shared_opp;
+	enum opp_table_access shared_opp;
 	struct dev_pm_opp *suspend_opp;
 
 	unsigned int *supported_hw;
diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
index d597e432e195..ab19adb07a12 100644
--- a/drivers/block/aoe/aoecmd.c
+++ b/drivers/block/aoe/aoecmd.c
@@ -1750,7 +1750,7 @@ aoecmd_init(void)
 	int ret;
 
 	/* get_zeroed_page returns page with ref count 1 */
-	p = (void *) get_zeroed_page(GFP_KERNEL | __GFP_REPEAT);
+	p = (void *) get_zeroed_page(GFP_KERNEL);
 	if (!p)
 		return -ENOMEM;
 	empty_page = virt_to_page(p);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 31e73a7a40f2..6a48ed41963f 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -941,7 +941,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
 	debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
 	debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
 	debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
-	debugfs_create_file("flags", 0444, dir, &nbd, &nbd_dbg_flags_ops);
+	debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
 
 	return 0;
 }
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index ca13df854639..2e6d1e9c3345 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -874,8 +874,12 @@ static int blkif_queue_rq(struct blk_mq_hw_ctx *hctx,
 			  const struct blk_mq_queue_data *qd)
 {
 	unsigned long flags;
-	struct blkfront_ring_info *rinfo = (struct blkfront_ring_info *)hctx->driver_data;
+	int qid = hctx->queue_num;
+	struct blkfront_info *info = hctx->queue->queuedata;
+	struct blkfront_ring_info *rinfo = NULL;
 
+	BUG_ON(info->nr_rings <= qid);
+	rinfo = &info->rinfo[qid];
 	blk_mq_start_request(qd->rq);
 	spin_lock_irqsave(&rinfo->ring_lock, flags);
 	if (RING_FULL(&rinfo->ring))
@@ -901,20 +905,9 @@ out_busy:
 	return BLK_MQ_RQ_QUEUE_BUSY;
 }
 
-static int blk_mq_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
-			    unsigned int index)
-{
-	struct blkfront_info *info = (struct blkfront_info *)data;
-
-	BUG_ON(info->nr_rings <= index);
-	hctx->driver_data = &info->rinfo[index];
-	return 0;
-}
-
 static struct blk_mq_ops blkfront_mq_ops = {
 	.queue_rq = blkif_queue_rq,
 	.map_queue = blk_mq_map_queue,
-	.init_hctx = blk_mq_init_hctx,
 };
 
 static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
@@ -950,6 +943,7 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size,
 		return PTR_ERR(rq);
 	}
 
+	rq->queuedata = info;
 	queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
 
 	if (info->feature_discard) {
@@ -2149,6 +2143,8 @@ static int blkfront_resume(struct xenbus_device *dev)
 		return err;
 
 	err = talk_to_blkback(dev, info);
+	if (!err)
+		blk_mq_update_nr_hw_queues(&info->tag_set, info->nr_rings);
 
 	/*
 	 * We have to wait for the backend to switch to
@@ -2485,10 +2481,23 @@ static void blkback_changed(struct xenbus_device *dev,
 		break;
 
 	case XenbusStateConnected:
-		if (dev->state != XenbusStateInitialised) {
+		/*
+		 * talk_to_blkback sets state to XenbusStateInitialised
+		 * and blkfront_connect sets it to XenbusStateConnected
+		 * (if connection went OK).
+		 *
+		 * If the backend (or toolstack) decides to poke at backend
+		 * state (and re-trigger the watch by setting the state repeatedly
+		 * to XenbusStateConnected (4)) we need to deal with this.
+		 * This is allowed as this is used to communicate to the guest
+		 * that the size of disk has changed!
+		 */
+		if ((dev->state != XenbusStateInitialised) &&
+		    (dev->state != XenbusStateConnected)) {
 			if (talk_to_blkback(dev, info))
 				break;
 		}
+
 		blkfront_connect(info);
 		break;
 
diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 94fb407d8561..44b1bd6baa38 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3820,6 +3820,7 @@ static void handle_new_recv_msgs(ipmi_smi_t intf)
 	while (!list_empty(&intf->waiting_rcv_msgs)) {
 		smi_msg = list_entry(intf->waiting_rcv_msgs.next,
 				     struct ipmi_smi_msg, link);
+		list_del(&smi_msg->link);
 		if (!run_to_completion)
 			spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
 					       flags);
@@ -3829,11 +3830,14 @@ static void handle_new_recv_msgs(ipmi_smi_t intf)
 		if (rv > 0) {
 			/*
 			 * To preserve message order, quit if we
-			 * can't handle a message.
+			 * can't handle a message.  Add the message
+			 * back at the head, this is safe because this
+			 * tasklet is the only thing that pulls the
+			 * messages.
 			 */
+			list_add(&smi_msg->link, &intf->waiting_rcv_msgs);
 			break;
 		} else {
-			list_del(&smi_msg->link);
 			if (rv == 0)
 				/* Message handled */
 				ipmi_free_smi_msg(smi_msg);
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 53ddba26578c..98efbfcdb503 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -175,6 +175,7 @@ config COMMON_CLK_KEYSTONE
 config COMMON_CLK_NXP
 	def_bool COMMON_CLK && (ARCH_LPC18XX || ARCH_LPC32XX)
 	select REGMAP_MMIO if ARCH_LPC32XX
+	select MFD_SYSCON if ARCH_LPC18XX
 	---help---
 	  Support for clock providers on NXP platforms.
 
diff --git a/drivers/clk/microchip/clk-pic32mzda.c b/drivers/clk/microchip/clk-pic32mzda.c
index 020a29acc5b0..51f54380474b 100644
--- a/drivers/clk/microchip/clk-pic32mzda.c
+++ b/drivers/clk/microchip/clk-pic32mzda.c
@@ -180,15 +180,15 @@ static int pic32mzda_clk_probe(struct platform_device *pdev)
 
 	/* register fixed rate clocks */
 	clks[POSCCLK] = clk_register_fixed_rate(&pdev->dev, "posc_clk", NULL,
-						CLK_IS_ROOT, 24000000);
+						0, 24000000);
 	clks[FRCCLK] =  clk_register_fixed_rate(&pdev->dev, "frc_clk", NULL,
-						CLK_IS_ROOT, 8000000);
+						0, 8000000);
 	clks[BFRCCLK] = clk_register_fixed_rate(&pdev->dev, "bfrc_clk", NULL,
-						CLK_IS_ROOT, 8000000);
+						0, 8000000);
 	clks[LPRCCLK] = clk_register_fixed_rate(&pdev->dev, "lprc_clk", NULL,
-						CLK_IS_ROOT, 32000);
+						0, 32000);
 	clks[UPLLCLK] = clk_register_fixed_rate(&pdev->dev, "usbphy_clk", NULL,
-						CLK_IS_ROOT, 24000000);
+						0, 24000000);
 	/* fixed rate (optional) clock */
 	if (of_find_property(np, "microchip,pic32mzda-sosc", NULL)) {
 		pr_info("pic32-clk: dt requests SOSC.\n");
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 0d159b513469..fe9dc17ea873 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -372,26 +372,9 @@ static bool intel_pstate_get_ppc_enable_status(void)
 	return acpi_ppc;
 }
 
-/*
- * The max target pstate ratio is a 8 bit value in both PLATFORM_INFO MSR and
- * in TURBO_RATIO_LIMIT MSR, which pstate driver stores in max_pstate and
- * max_turbo_pstate fields. The PERF_CTL MSR contains 16 bit value for P state
- * ratio, out of it only high 8 bits are used. For example 0x1700 is setting
- * target ratio 0x17. The _PSS control value stores in a format which can be
- * directly written to PERF_CTL MSR. But in intel_pstate driver this shift
- * occurs during write to PERF_CTL (E.g. for cores core_set_pstate()).
- * This function converts the _PSS control value to intel pstate driver format
- * for comparison and assignment.
- */
-static int convert_to_native_pstate_format(struct cpudata *cpu, int index)
-{
-	return cpu->acpi_perf_data.states[index].control >> 8;
-}
-
 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
 	struct cpudata *cpu;
-	int turbo_pss_ctl;
 	int ret;
 	int i;
 
@@ -441,11 +424,10 @@ static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 	 * max frequency, which will cause a reduced performance as
 	 * this driver uses real max turbo frequency as the max
 	 * frequency. So correct this frequency in _PSS table to
-	 * correct max turbo frequency based on the turbo ratio.
+	 * correct max turbo frequency based on the turbo state.
 	 * Also need to convert to MHz as _PSS freq is in MHz.
 	 */
-	turbo_pss_ctl = convert_to_native_pstate_format(cpu, 0);
-	if (turbo_pss_ctl > cpu->pstate.max_pstate)
+	if (!limits->turbo_disabled)
 		cpu->acpi_perf_data.states[0].core_frequency =
 					policy->cpuinfo.max_freq / 1000;
 	cpu->valid_pss_table = true;
@@ -1460,6 +1442,9 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 
 	intel_pstate_clear_update_util_hook(policy->cpu);
 
+	pr_debug("set_policy cpuinfo.max %u policy->max %u\n",
+		 policy->cpuinfo.max_freq, policy->max);
+
 	cpu = all_cpu_data[0];
 	if (cpu->pstate.max_pstate_physical > cpu->pstate.max_pstate &&
 	    policy->max < policy->cpuinfo.max_freq &&
@@ -1495,13 +1480,13 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 				   limits->max_sysfs_pct);
 	limits->max_perf_pct = max(limits->min_policy_pct,
 				   limits->max_perf_pct);
-	limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
 
 	/* Make sure min_perf_pct <= max_perf_pct */
 	limits->min_perf_pct = min(limits->max_perf_pct, limits->min_perf_pct);
 
 	limits->min_perf = div_fp(limits->min_perf_pct, 100);
 	limits->max_perf = div_fp(limits->max_perf_pct, 100);
+	limits->max_perf = round_up(limits->max_perf, FRAC_BITS);
 
  out:
 	intel_pstate_set_update_util_hook(policy->cpu);
@@ -1558,8 +1543,11 @@ static int intel_pstate_cpu_init(struct cpufreq_policy *policy)
 
 	/* cpuinfo and default policy values */
 	policy->cpuinfo.min_freq = cpu->pstate.min_pstate * cpu->pstate.scaling;
-	policy->cpuinfo.max_freq =
-		cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+	update_turbo_state();
+	policy->cpuinfo.max_freq = limits->turbo_disabled ?
+			cpu->pstate.max_pstate : cpu->pstate.turbo_pstate;
+	policy->cpuinfo.max_freq *= cpu->pstate.scaling;
+
 	intel_pstate_init_acpi_perf_limits(policy);
 	policy->cpuinfo.transition_latency = CPUFREQ_ETERNAL;
 	cpumask_set_cpu(policy->cpu, policy->cpus);
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index 808a320e9d5d..a7ecb9a84c15 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -487,7 +487,7 @@ static int __init pcc_cpufreq_probe(void)
 	doorbell.space_id = reg_resource->space_id;
 	doorbell.bit_width = reg_resource->bit_width;
 	doorbell.bit_offset = reg_resource->bit_offset;
-	doorbell.access_width = 64;
+	doorbell.access_width = 4;
 	doorbell.address = reg_resource->address;
 
 	pr_debug("probe: doorbell: space_id is %d, bit_width is %d, "
diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 1d6c803804d5..e92418facc92 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -268,8 +268,11 @@ int update_devfreq(struct devfreq *devfreq)
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_PRECHANGE);
 
 	err = devfreq->profile->target(devfreq->dev.parent, &freq, flags);
-	if (err)
+	if (err) {
+		freqs.new = cur_freq;
+		devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
 		return err;
+	}
 
 	freqs.new = freq;
 	devfreq_notify_transition(devfreq, &freqs, DEVFREQ_POSTCHANGE);
@@ -552,6 +555,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	devfreq->profile = profile;
 	strncpy(devfreq->governor_name, governor_name, DEVFREQ_NAME_LEN);
 	devfreq->previous_freq = profile->initial_freq;
+	devfreq->last_status.current_frequency = profile->initial_freq;
 	devfreq->data = data;
 	devfreq->nb.notifier_call = devfreq_notifier_call;
 
@@ -561,23 +565,22 @@ struct devfreq *devfreq_add_device(struct device *dev,
 		mutex_lock(&devfreq->lock);
 	}
 
-	devfreq->trans_table =	devm_kzalloc(dev, sizeof(unsigned int) *
-						devfreq->profile->max_state *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->time_in_state = devm_kzalloc(dev, sizeof(unsigned long) *
-						devfreq->profile->max_state,
-						GFP_KERNEL);
-	devfreq->last_stat_updated = jiffies;
-
 	dev_set_name(&devfreq->dev, "%s", dev_name(dev));
 	err = device_register(&devfreq->dev);
 	if (err) {
-		put_device(&devfreq->dev);
 		mutex_unlock(&devfreq->lock);
 		goto err_out;
 	}
 
+	devfreq->trans_table =	devm_kzalloc(&devfreq->dev, sizeof(unsigned int) *
+						devfreq->profile->max_state *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->time_in_state = devm_kzalloc(&devfreq->dev, sizeof(unsigned long) *
+						devfreq->profile->max_state,
+						GFP_KERNEL);
+	devfreq->last_stat_updated = jiffies;
+
 	srcu_init_notifier_head(&devfreq->transition_notifier_list);
 
 	mutex_unlock(&devfreq->lock);
@@ -603,7 +606,6 @@ struct devfreq *devfreq_add_device(struct device *dev,
 err_init:
 	list_del(&devfreq->node);
 	device_unregister(&devfreq->dev);
-	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
@@ -621,7 +623,6 @@ int devfreq_remove_device(struct devfreq *devfreq)
 		return -EINVAL;
 
 	device_unregister(&devfreq->dev);
-	put_device(&devfreq->dev);
 
 	return 0;
 }
diff --git a/drivers/devfreq/event/exynos-nocp.c b/drivers/devfreq/event/exynos-nocp.c
index 6b6a5f310486..a5841403bde8 100644
--- a/drivers/devfreq/event/exynos-nocp.c
+++ b/drivers/devfreq/event/exynos-nocp.c
@@ -220,9 +220,6 @@ static int exynos_nocp_parse_dt(struct platform_device *pdev,
 
 	/* Maps the memory mapped IO to control nocp register */
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (IS_ERR(res))
-		return PTR_ERR(res);
-
 	base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
diff --git a/drivers/dma/at_xdmac.c b/drivers/dma/at_xdmac.c
index 8e304b1befc5..75bd6621dc5d 100644
--- a/drivers/dma/at_xdmac.c
+++ b/drivers/dma/at_xdmac.c
@@ -242,7 +242,7 @@ struct at_xdmac_lld {
 	u32		mbr_dus;	/* Destination Microblock Stride Register */
 };
 
-
+/* 64-bit alignment needed to update CNDA and CUBC registers in an atomic way. */
 struct at_xdmac_desc {
 	struct at_xdmac_lld		lld;
 	enum dma_transfer_direction	direction;
@@ -253,7 +253,7 @@ struct at_xdmac_desc {
 	unsigned int			xfer_size;
 	struct list_head		descs_list;
 	struct list_head		xfer_node;
-};
+} __aligned(sizeof(u64));
 
 static inline void __iomem *at_xdmac_chan_reg_base(struct at_xdmac *atxdmac, unsigned int chan_nb)
 {
@@ -1400,6 +1400,7 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	u32			cur_nda, check_nda, cur_ubc, mask, value;
 	u8			dwidth = 0;
 	unsigned long		flags;
+	bool			initd;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
 	if (ret == DMA_COMPLETE)
@@ -1424,7 +1425,16 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	residue = desc->xfer_size;
 	/*
 	 * Flush FIFO: only relevant when the transfer is source peripheral
-	 * synchronized.
+	 * synchronized. Flush is needed before reading CUBC because data in
+	 * the FIFO are not reported by CUBC. Reporting a residue of the
+	 * transfer length while we have data in FIFO can cause issue.
+	 * Usecase: atmel USART has a timeout which means I have received
+	 * characters but there is no more character received for a while. On
+	 * timeout, it requests the residue. If the data are in the DMA FIFO,
+	 * we will return a residue of the transfer length. It means no data
+	 * received. If an application is waiting for these data, it will hang
+	 * since we won't have another USART timeout without receiving new
+	 * data.
 	 */
 	mask = AT_XDMAC_CC_TYPE | AT_XDMAC_CC_DSYNC;
 	value = AT_XDMAC_CC_TYPE_PER_TRAN | AT_XDMAC_CC_DSYNC_PER2MEM;
@@ -1435,34 +1445,43 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	}
 
 	/*
-	 * When processing the residue, we need to read two registers but we
-	 * can't do it in an atomic way. AT_XDMAC_CNDA is used to find where
-	 * we stand in the descriptor list and AT_XDMAC_CUBC is used
-	 * to know how many data are remaining for the current descriptor.
-	 * Since the dma channel is not paused to not loose data, between the
-	 * AT_XDMAC_CNDA and AT_XDMAC_CUBC read, we may have change of
-	 * descriptor.
-	 * For that reason, after reading AT_XDMAC_CUBC, we check if we are
-	 * still using the same descriptor by reading a second time
-	 * AT_XDMAC_CNDA. If AT_XDMAC_CNDA has changed, it means we have to
-	 * read again AT_XDMAC_CUBC.
+	 * The easiest way to compute the residue should be to pause the DMA
+	 * but doing this can lead to miss some data as some devices don't
+	 * have FIFO.
+	 * We need to read several registers because:
+	 * - DMA is running therefore a descriptor change is possible while
+	 * reading these registers
+	 * - When the block transfer is done, the value of the CUBC register
+	 * is set to its initial value until the fetch of the next descriptor.
+	 * This value will corrupt the residue calculation so we have to skip
+	 * it.
+	 *
+	 * INITD --------                    ------------
+	 *              |____________________|
+	 *       _______________________  _______________
+	 * NDA       @desc2             \/   @desc3
+	 *       _______________________/\_______________
+	 *       __________  ___________  _______________
+	 * CUBC       0    \/ MAX desc1 \/  MAX desc2
+	 *       __________/\___________/\_______________
+	 *
+	 * Since descriptors are aligned on 64 bits, we can assume that
+	 * the update of NDA and CUBC is atomic.
 	 * Memory barriers are used to ensure the read order of the registers.
-	 * A max number of retries is set because unlikely it can never ends if
-	 * we are transferring a lot of data with small buffers.
+	 * A max number of retries is set because unlikely it could never ends.
 	 */
-	cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
-	rmb();
-	cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
 	for (retry = 0; retry < AT_XDMAC_RESIDUE_MAX_RETRIES; retry++) {
-		rmb();
 		check_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
-
-		if (likely(cur_nda == check_nda))
-			break;
-
-		cur_nda = check_nda;
+		rmb();
+		initd = !!(at_xdmac_chan_read(atchan, AT_XDMAC_CC) & AT_XDMAC_CC_INITD);
 		rmb();
 		cur_ubc = at_xdmac_chan_read(atchan, AT_XDMAC_CUBC);
+		rmb();
+		cur_nda = at_xdmac_chan_read(atchan, AT_XDMAC_CNDA) & 0xfffffffc;
+		rmb();
+
+		if ((check_nda == cur_nda) && initd)
+			break;
 	}
 
 	if (unlikely(retry >= AT_XDMAC_RESIDUE_MAX_RETRIES)) {
@@ -1471,6 +1490,19 @@ at_xdmac_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 	}
 
 	/*
+	 * Flush FIFO: only relevant when the transfer is source peripheral
+	 * synchronized. Another flush is needed here because CUBC is updated
+	 * when the controller sends the data write command. It can lead to
+	 * report data that are not written in the memory or the device. The
+	 * FIFO flush ensures that data are really written.
+	 */
+	if ((desc->lld.mbr_cfg & mask) == value) {
+		at_xdmac_write(atxdmac, AT_XDMAC_GSWF, atchan->mask);
+		while (!(at_xdmac_chan_read(atchan, AT_XDMAC_CIS) & AT_XDMAC_CIS_FIS))
+			cpu_relax();
+	}
+
+	/*
 	 * Remove size of all microblocks already transferred and the current
 	 * one. Then add the remaining size to transfer of the current
 	 * microblock.
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 25d1dadcddd1..d0446a75990a 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -703,8 +703,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 		goto free_resources;
 	}
 
-	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src), 0,
-				 PAGE_SIZE, DMA_TO_DEVICE);
+	src_dma = dma_map_page(dma_chan->device->dev, virt_to_page(src),
+			       (size_t)src & ~PAGE_MASK, PAGE_SIZE,
+			       DMA_TO_DEVICE);
 	unmap->addr[0] = src_dma;
 
 	ret = dma_mapping_error(dma_chan->device->dev, src_dma);
@@ -714,8 +715,9 @@ static int mv_chan_memcpy_self_test(struct mv_xor_chan *mv_chan)
 	}
 	unmap->to_cnt = 1;
 
-	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest), 0,
-				  PAGE_SIZE, DMA_FROM_DEVICE);
+	dest_dma = dma_map_page(dma_chan->device->dev, virt_to_page(dest),
+				(size_t)dest & ~PAGE_MASK, PAGE_SIZE,
+				DMA_FROM_DEVICE);
 	unmap->addr[1] = dest_dma;
 
 	ret = dma_mapping_error(dma_chan->device->dev, dest_dma);
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 6aa256b0a1ed..c3ee3ad98a63 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -565,7 +565,8 @@ void edac_mc_reset_delay_period(unsigned long value)
 	list_for_each(item, &mc_devices) {
 		mci = list_entry(item, struct mem_ctl_info, link);
 
-		edac_mod_work(&mci->work, value);
+		if (mci->op_state == OP_RUNNING_POLL)
+			edac_mod_work(&mci->work, value);
 	}
 	mutex_unlock(&mem_ctls_mutex);
 }
diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index b4d0bf6534cf..6744d88bdea8 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -239,8 +239,11 @@ static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
 	{ 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
 };
 
-#define RIR_RNK_TGT(reg)		GET_BITFIELD(reg, 16, 19)
-#define RIR_OFFSET(reg)		GET_BITFIELD(reg,  2, 14)
+#define RIR_RNK_TGT(type, reg) (((type) == BROADWELL) ? \
+	GET_BITFIELD(reg, 20, 23) : GET_BITFIELD(reg, 16, 19))
+
+#define RIR_OFFSET(type, reg) (((type) == HASWELL || (type) == BROADWELL) ? \
+	GET_BITFIELD(reg,  2, 15) : GET_BITFIELD(reg,  2, 14))
 
 /* Device 16, functions 2-7 */
 
@@ -326,6 +329,7 @@ struct pci_id_descr {
 struct pci_id_table {
 	const struct pci_id_descr	*descr;
 	int				n_devs;
+	enum type			type;
 };
 
 struct sbridge_dev {
@@ -394,9 +398,14 @@ static const struct pci_id_descr pci_dev_descr_sbridge[] = {
 	{ PCI_DESCR(PCI_DEVICE_ID_INTEL_SBRIDGE_BR, 0)		},
 };
 
-#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
+#define PCI_ID_TABLE_ENTRY(A, T) {	\
+	.descr = A,			\
+	.n_devs = ARRAY_SIZE(A),	\
+	.type = T			\
+}
+
 static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge, SANDY_BRIDGE),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -463,7 +472,7 @@ static const struct pci_id_descr pci_dev_descr_ibridge[] = {
 };
 
 static const struct pci_id_table pci_dev_descr_ibridge_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_ibridge, IVY_BRIDGE),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -536,7 +545,7 @@ static const struct pci_id_descr pci_dev_descr_haswell[] = {
 };
 
 static const struct pci_id_table pci_dev_descr_haswell_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_haswell, HASWELL),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -580,7 +589,7 @@ static const struct pci_id_descr pci_dev_descr_knl[] = {
 };
 
 static const struct pci_id_table pci_dev_descr_knl_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_knl),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_knl, KNIGHTS_LANDING),
 	{0,}
 };
 
@@ -648,7 +657,7 @@ static const struct pci_id_descr pci_dev_descr_broadwell[] = {
 };
 
 static const struct pci_id_table pci_dev_descr_broadwell_table[] = {
-	PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell),
+	PCI_ID_TABLE_ENTRY(pci_dev_descr_broadwell, BROADWELL),
 	{0,}			/* 0 terminated list. */
 };
 
@@ -1894,14 +1903,14 @@ static void get_memory_layout(const struct mem_ctl_info *mci)
 				pci_read_config_dword(pvt->pci_tad[i],
 						      rir_offset[j][k],
 						      &reg);
-				tmp_mb = RIR_OFFSET(reg) << 6;
+				tmp_mb = RIR_OFFSET(pvt->info.type, reg) << 6;
 
 				gb = div_u64_rem(tmp_mb, 1024, &mb);
 				edac_dbg(0, "CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
 					 i, j, k,
 					 gb, (mb*1000)/1024,
 					 ((u64)tmp_mb) << 20L,
-					 (u32)RIR_RNK_TGT(reg),
+					 (u32)RIR_RNK_TGT(pvt->info.type, reg),
 					 reg);
 			}
 		}
@@ -2234,7 +2243,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci,
 	pci_read_config_dword(pvt->pci_tad[ch_add + base_ch],
 			      rir_offset[n_rir][idx],
 			      &reg);
-	*rank = RIR_RNK_TGT(reg);
+	*rank = RIR_RNK_TGT(pvt->info.type, reg);
 
 	edac_dbg(0, "RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
 		 n_rir,
@@ -3357,12 +3366,12 @@ fail0:
 #define ICPU(model, table) \
 	{ X86_VENDOR_INTEL, 6, model, 0, (unsigned long)&table }
 
-/* Order here must match "enum type" */
 static const struct x86_cpu_id sbridge_cpuids[] = {
 	ICPU(0x2d, pci_dev_descr_sbridge_table),	/* SANDY_BRIDGE */
 	ICPU(0x3e, pci_dev_descr_ibridge_table),	/* IVY_BRIDGE */
 	ICPU(0x3f, pci_dev_descr_haswell_table),	/* HASWELL */
 	ICPU(0x4f, pci_dev_descr_broadwell_table),	/* BROADWELL */
+	ICPU(0x56, pci_dev_descr_broadwell_table),	/* BROADWELL-DE */
 	ICPU(0x57, pci_dev_descr_knl_table),		/* KNIGHTS_LANDING */
 	{ }
 };
@@ -3398,7 +3407,7 @@ static int sbridge_probe(const struct x86_cpu_id *id)
 			 mc, mc + 1, num_mc);
 
 		sbridge_dev->mc = mc++;
-		rc = sbridge_register_mci(sbridge_dev, id - sbridge_cpuids);
+		rc = sbridge_register_mci(sbridge_dev, ptable->type);
 		if (unlikely(rc < 0))
 			goto fail1;
 	}
diff --git a/drivers/extcon/extcon-palmas.c b/drivers/extcon/extcon-palmas.c
index 8b3226dca1d9..caff46c0e214 100644
--- a/drivers/extcon/extcon-palmas.c
+++ b/drivers/extcon/extcon-palmas.c
@@ -360,6 +360,8 @@ static int palmas_usb_probe(struct platform_device *pdev)
 
 	palmas_enable_irq(palmas_usb);
 	/* perform initial detection */
+	if (palmas_usb->enable_gpio_vbus_detection)
+		palmas_vbus_irq_handler(palmas_usb->gpio_vbus_irq, palmas_usb);
 	palmas_gpio_id_detect(&palmas_usb->wq_detectid.work);
 	device_set_wakeup_capable(&pdev->dev, true);
 	return 0;
diff --git a/drivers/firmware/efi/arm-init.c b/drivers/firmware/efi/arm-init.c
index a850cbc48d8d..c49d50e68aee 100644
--- a/drivers/firmware/efi/arm-init.c
+++ b/drivers/firmware/efi/arm-init.c
@@ -174,6 +174,7 @@ static __init void reserve_regions(void)
 {
 	efi_memory_desc_t *md;
 	u64 paddr, npages, size;
+	int resv;
 
 	if (efi_enabled(EFI_DBG))
 		pr_info("Processing EFI memory map:\n");
@@ -190,12 +191,14 @@ static __init void reserve_regions(void)
 		paddr = md->phys_addr;
 		npages = md->num_pages;
 
+		resv = is_reserve_region(md);
 		if (efi_enabled(EFI_DBG)) {
 			char buf[64];
 
-			pr_info("  0x%012llx-0x%012llx %s",
+			pr_info("  0x%012llx-0x%012llx %s%s\n",
 				paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1,
-				efi_md_typeattr_format(buf, sizeof(buf), md));
+				efi_md_typeattr_format(buf, sizeof(buf), md),
+				resv ? "*" : "");
 		}
 
 		memrange_efi_to_native(&paddr, &npages);
@@ -204,14 +207,9 @@ static __init void reserve_regions(void)
 		if (is_normal_ram(md))
 			early_init_dt_add_memory_arch(paddr, size);
 
-		if (is_reserve_region(md)) {
+		if (resv)
 			memblock_mark_nomap(paddr, size);
-			if (efi_enabled(EFI_DBG))
-				pr_cont("*");
-		}
 
-		if (efi_enabled(EFI_DBG))
-			pr_cont("\n");
 	}
 
 	set_bit(EFI_MEMMAP, &efi.flags);
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 48da857f4774..cebcb405812e 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -33,6 +33,7 @@ config ARCH_REQUIRE_GPIOLIB
 
 menuconfig GPIOLIB
 	bool "GPIO Support"
+	select ANON_INODES
 	help
 	  This enables GPIO support through the generic GPIO library.
 	  You only need to enable this, if you also want to enable
@@ -530,7 +531,7 @@ menu "Port-mapped I/O GPIO drivers"
 
 config GPIO_104_DIO_48E
 	tristate "ACCES 104-DIO-48E GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-DIO-48E series (104-DIO-48E,
@@ -540,7 +541,7 @@ config GPIO_104_DIO_48E
 
 config GPIO_104_IDIO_16
 	tristate "ACCES 104-IDIO-16 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-IDIO-16 family (104-IDIO-16,
@@ -551,7 +552,7 @@ config GPIO_104_IDIO_16
 
 config GPIO_104_IDI_48
 	tristate "ACCES 104-IDI-48 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the ACCES 104-IDI-48 family (104-IDI-48A,
@@ -627,7 +628,7 @@ config GPIO_TS5500
 
 config GPIO_WS16C48
 	tristate "WinSystems WS16C48 GPIO support"
-	depends on ISA
+	depends on ISA_BUS_API
 	select GPIOLIB_IRQCHIP
 	help
 	  Enables GPIO support for the WinSystems WS16C48. The base port
diff --git a/drivers/gpio/gpio-104-dio-48e.c b/drivers/gpio/gpio-104-dio-48e.c
index 1a647c07be67..fcf776971ca9 100644
--- a/drivers/gpio/gpio-104-dio-48e.c
+++ b/drivers/gpio/gpio-104-dio-48e.c
@@ -75,7 +75,7 @@ static int dio48e_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
 	const unsigned io_port = offset / 8;
-	const unsigned control_port = io_port / 2;
+	const unsigned int control_port = io_port / 3;
 	const unsigned control_addr = dio48egpio->base + 3 + control_port*4;
 	unsigned long flags;
 	unsigned control;
@@ -115,7 +115,7 @@ static int dio48e_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
 {
 	struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
 	const unsigned io_port = offset / 8;
-	const unsigned control_port = io_port / 2;
+	const unsigned int control_port = io_port / 3;
 	const unsigned mask = BIT(offset % 8);
 	const unsigned control_addr = dio48egpio->base + 3 + control_port*4;
 	const unsigned out_port = (io_port > 2) ? io_port + 1 : io_port;
diff --git a/drivers/gpio/gpio-104-idi-48.c b/drivers/gpio/gpio-104-idi-48.c
index 6c75c83baf5a..2d2763ea1a68 100644
--- a/drivers/gpio/gpio-104-idi-48.c
+++ b/drivers/gpio/gpio-104-idi-48.c
@@ -247,6 +247,7 @@ static int idi_48_probe(struct device *dev, unsigned int id)
 	idi48gpio->irq = irq[id];
 
 	spin_lock_init(&idi48gpio->lock);
+	spin_lock_init(&idi48gpio->ack_lock);
 
 	dev_set_drvdata(dev, idi48gpio);
 
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index 9aabc48ff5de..953e4b829e32 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -547,11 +547,11 @@ static void bcm_kona_gpio_reset(struct bcm_kona_gpio *kona_gpio)
 	/* disable interrupts and clear status */
 	for (i = 0; i < kona_gpio->num_bank; i++) {
 		/* Unlock the entire bank first */
-		bcm_kona_gpio_write_lock_regs(kona_gpio, i, UNLOCK_CODE);
+		bcm_kona_gpio_write_lock_regs(reg_base, i, UNLOCK_CODE);
 		writel(0xffffffff, reg_base + GPIO_INT_MASK(i));
 		writel(0xffffffff, reg_base + GPIO_INT_STATUS(i));
 		/* Now re-lock the bank */
-		bcm_kona_gpio_write_lock_regs(kona_gpio, i, LOCK_CODE);
+		bcm_kona_gpio_write_lock_regs(reg_base, i, LOCK_CODE);
 	}
 }
 
diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
index 75c6355b018d..e72794e463aa 100644
--- a/drivers/gpio/gpio-zynq.c
+++ b/drivers/gpio/gpio-zynq.c
@@ -709,7 +709,13 @@ static int zynq_gpio_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "input clock not found.\n");
 		return PTR_ERR(gpio->clk);
 	}
+	ret = clk_prepare_enable(gpio->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "Unable to enable clock.\n");
+		return ret;
+	}
 
+	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 	ret = pm_runtime_get_sync(&pdev->dev);
 	if (ret < 0)
@@ -747,6 +753,7 @@ err_pm_put:
 	pm_runtime_put(&pdev->dev);
 err_pm_dis:
 	pm_runtime_disable(&pdev->dev);
+	clk_disable_unprepare(gpio->clk);
 
 	return ret;
 }
diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d22dcc38179d..4aabddb38b59 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -16,6 +16,7 @@
 #include <linux/errno.h>
 #include <linux/module.h>
 #include <linux/io.h>
+#include <linux/io-mapping.h>
 #include <linux/gpio/consumer.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 24f60d28f0c0..570771ed19e6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -449,7 +449,6 @@ static void gpiodevice_release(struct device *dev)
 {
 	struct gpio_device *gdev = dev_get_drvdata(dev);
 
-	cdev_del(&gdev->chrdev);
 	list_del(&gdev->list);
 	ida_simple_remove(&gpio_ida, gdev->id);
 	kfree(gdev->label);
@@ -482,7 +481,6 @@ static int gpiochip_setup_dev(struct gpio_device *gdev)
 
 	/* From this point, the .release() function cleans up gpio_device */
 	gdev->dev.release = gpiodevice_release;
-	get_device(&gdev->dev);
 	pr_debug("%s: registered GPIOs %d to %d on device: %s (%s)\n",
 		 __func__, gdev->base, gdev->base + gdev->ngpio - 1,
 		 dev_name(&gdev->dev), gdev->chip->label ? : "generic");
@@ -770,6 +768,8 @@ void gpiochip_remove(struct gpio_chip *chip)
 	 * be removed, else it will be dangling until the last user is
 	 * gone.
 	 */
+	cdev_del(&gdev->chrdev);
+	device_del(&gdev->dev);
 	put_device(&gdev->dev);
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
@@ -869,7 +869,7 @@ struct gpio_chip *gpiochip_find(void *data,
 
 	spin_lock_irqsave(&gpio_lock, flags);
 	list_for_each_entry(gdev, &gpio_devices, list)
-		if (match(gdev->chip, data))
+		if (gdev->chip && match(gdev->chip, data))
 			break;
 
 	/* No match? */
@@ -1373,8 +1373,12 @@ done:
 #define VALIDATE_DESC(desc) do { \
 	if (!desc) \
 		return 0; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return PTR_ERR(desc); \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return -EINVAL; \
 	} \
 	if ( !desc->gdev->chip ) { \
@@ -1386,8 +1390,12 @@ done:
 #define VALIDATE_DESC_VOID(desc) do { \
 	if (!desc) \
 		return; \
+	if (IS_ERR(desc)) {						\
+		pr_warn("%s: invalid GPIO (errorpointer)\n", __func__); \
+		return; \
+	} \
 	if (!desc->gdev) { \
-		pr_warn("%s: invalid GPIO\n", __func__); \
+		pr_warn("%s: invalid GPIO (no device)\n", __func__); \
 		return; \
 	} \
 	if (!desc->gdev->chip) { \
@@ -2056,7 +2064,14 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 	struct gpio_chip *chip;
 	int offset;
 
-	VALIDATE_DESC(desc);
+	/*
+	 * Cannot VALIDATE_DESC() here as gpiod_to_irq() consumer semantics
+	 * requires this function to not return zero on an invalid descriptor
+	 * but rather a negative error number.
+	 */
+	if (!desc || IS_ERR(desc) || !desc->gdev || !desc->gdev->chip)
+		return -EINVAL;
+
 	chip = desc->gdev->chip;
 	offset = gpio_chip_hwgpio(desc);
 	if (chip->to_irq) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index da3d02154fa6..eb09037a7161 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -85,8 +85,12 @@ extern int amdgpu_vm_debug;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
 extern int amdgpu_powerplay;
+extern int amdgpu_powercontainment;
 extern unsigned amdgpu_pcie_gen_cap;
 extern unsigned amdgpu_pcie_lane_cap;
+extern unsigned amdgpu_cg_mask;
+extern unsigned amdgpu_pg_mask;
+extern char *amdgpu_disable_cu;
 
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
@@ -183,6 +187,10 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state);
+int amdgpu_wait_for_idle(struct amdgpu_device *adev,
+			 enum amd_ip_block_type block_type);
+bool amdgpu_is_idle(struct amdgpu_device *adev,
+		    enum amd_ip_block_type block_type);
 
 struct amdgpu_ip_block_version {
 	enum amd_ip_block_type type;
@@ -594,11 +602,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
 		     void *owner);
-bool amdgpu_sync_is_idle(struct amdgpu_sync *sync);
-int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
-			     struct fence *fence);
+struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring);
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
-int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 void amdgpu_sync_fini(void);
@@ -754,12 +760,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);
 
+void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
-void amdgpu_job_free_func(struct kref *refcount);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f);
-void amdgpu_job_timeout_func(struct work_struct *work);
 
 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
@@ -771,8 +776,6 @@ struct amdgpu_ring {
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr_offs;
-	u64			next_rptr_gpu_addr;
-	volatile u32		*next_rptr_cpu_addr;
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		ring_size;
@@ -791,7 +794,6 @@ struct amdgpu_ring {
 	u32			doorbell_index;
 	bool			use_doorbell;
 	unsigned		wptr_offs;
-	unsigned		next_rptr_offs;
 	unsigned		fence_offs;
 	uint64_t		current_ctx;
 	enum amdgpu_ring_type	type;
@@ -799,6 +801,9 @@ struct amdgpu_ring {
 	unsigned		cond_exe_offs;
 	u64				cond_exe_gpu_addr;
 	volatile u32	*cond_exe_cpu_addr;
+#if defined(CONFIG_DEBUG_FS)
+	struct dentry *ent;
+#endif
 };
 
 /*
@@ -861,6 +866,7 @@ struct amdgpu_vm {
 	struct amdgpu_bo	*page_directory;
 	unsigned		max_pde_used;
 	struct fence		*page_directory_fence;
+	uint64_t		last_eviction_counter;
 
 	/* array of page tables, one for each page directory entry */
 	struct amdgpu_vm_pt	*page_tables;
@@ -883,13 +889,14 @@ struct amdgpu_vm_id {
 	struct fence		*first;
 	struct amdgpu_sync	active;
 	struct fence		*last_flush;
-	struct amdgpu_ring      *last_user;
 	atomic64_t		owner;
 
 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
 	struct fence		*flushed_updates;
 
+	uint32_t                current_gpu_reset_count;
+
 	uint32_t		gds_base;
 	uint32_t		gds_size;
 	uint32_t		gws_base;
@@ -905,6 +912,10 @@ struct amdgpu_vm_manager {
 	struct list_head			ids_lru;
 	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];
 
+	/* Handling of VM fences */
+	u64					fence_context;
+	unsigned				seqno[AMDGPU_MAX_RINGS];
+
 	uint32_t				max_pfn;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
@@ -926,17 +937,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
-void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
+void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct list_head *duplicates);
 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm);
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
-		      unsigned *vm_id, uint64_t *vm_pd_addr);
-int amdgpu_vm_flush(struct amdgpu_ring *ring,
-		    unsigned vm_id, uint64_t pd_addr,
-		    uint32_t gds_base, uint32_t gds_size,
-		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size);
+		      struct amdgpu_job *job);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
@@ -1142,6 +1150,12 @@ struct amdgpu_cu_info {
 	uint32_t bitmap[4][4];
 };
 
+struct amdgpu_gfx_funcs {
+	/* get the gpu clock counter */
+	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+	void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
+};
+
 struct amdgpu_gfx {
 	struct mutex			gpu_clock_mutex;
 	struct amdgpu_gca_config	config;
@@ -1178,6 +1192,7 @@ struct amdgpu_gfx {
 	/* ce ram size*/
 	unsigned			ce_ram_size;
 	struct amdgpu_cu_info		cu_info;
+	const struct amdgpu_gfx_funcs	*funcs;
 };
 
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@@ -1195,10 +1210,6 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
-unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
-			    uint32_t **data);
-int amdgpu_ring_restore(struct amdgpu_ring *ring,
-			unsigned size, uint32_t *data);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, u32 nop, u32 align_mask,
 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
@@ -1250,6 +1261,7 @@ struct amdgpu_job {
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		ctx;
+	bool                    vm_needs_flush;
 	unsigned		vm_id;
 	uint64_t		vm_pd_addr;
 	uint32_t		gds_base, gds_size;
@@ -1257,8 +1269,7 @@ struct amdgpu_job {
 	uint32_t		oa_base, oa_size;
 
 	/* user fence handling */
-	struct amdgpu_bo	*uf_bo;
-	uint32_t		uf_offset;
+	uint64_t		uf_addr;
 	uint64_t		uf_sequence;
 
 };
@@ -1560,6 +1571,12 @@ struct amdgpu_dpm_funcs {
 	u32 (*get_fan_control_mode)(struct amdgpu_device *adev);
 	int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed);
 	int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed);
+	int (*force_clock_level)(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t mask);
+	int (*print_clock_levels)(struct amdgpu_device *adev, enum pp_clock_type type, char *buf);
+	int (*get_sclk_od)(struct amdgpu_device *adev);
+	int (*set_sclk_od)(struct amdgpu_device *adev, uint32_t value);
+	int (*get_mclk_od)(struct amdgpu_device *adev);
+	int (*set_mclk_od)(struct amdgpu_device *adev, uint32_t value);
 };
 
 struct amdgpu_dpm {
@@ -1767,6 +1784,8 @@ int amdgpu_debugfs_init(struct drm_minor *minor);
 void amdgpu_debugfs_cleanup(struct drm_minor *minor);
 #endif
 
+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+
 /*
  * amdgpu smumgr functions
  */
@@ -1811,15 +1830,13 @@ struct amdgpu_asic_funcs {
 			     u32 sh_num, u32 reg_offset, u32 *value);
 	void (*set_vga_state)(struct amdgpu_device *adev, bool state);
 	int (*reset)(struct amdgpu_device *adev);
-	/* wait for mc_idle */
-	int (*wait_for_mc_idle)(struct amdgpu_device *adev);
 	/* get the reference clock */
 	u32 (*get_xclk)(struct amdgpu_device *adev);
-	/* get the gpu clock counter */
-	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
 	/* MM block clocks */
 	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
 	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
+	/* query virtual capabilities */
+	u32 (*get_virtual_caps)(struct amdgpu_device *adev);
 };
 
 /*
@@ -1914,8 +1931,12 @@ void amdgpu_cgs_destroy_device(struct cgs_device *cgs_device);
 
 
 /* GPU virtualization */
+#define AMDGPU_VIRT_CAPS_SRIOV_EN       (1 << 0)
+#define AMDGPU_VIRT_CAPS_IS_VF          (1 << 1)
 struct amdgpu_virtualization {
 	bool supports_sr_iov;
+	bool is_virtual;
+	u32 caps;
 };
 
 /*
@@ -1997,6 +2018,10 @@ struct amdgpu_device {
 	spinlock_t didt_idx_lock;
 	amdgpu_rreg_t			didt_rreg;
 	amdgpu_wreg_t			didt_wreg;
+	/* protects concurrent gc_cac register access */
+	spinlock_t gc_cac_idx_lock;
+	amdgpu_rreg_t			gc_cac_rreg;
+	amdgpu_wreg_t			gc_cac_wreg;
 	/* protects concurrent ENDPOINT (audio) register access */
 	spinlock_t audio_endpt_idx_lock;
 	amdgpu_block_rreg_t		audio_endpt_rreg;
@@ -2022,6 +2047,7 @@ struct amdgpu_device {
 	atomic64_t			vram_vis_usage;
 	atomic64_t			gtt_usage;
 	atomic64_t			num_bytes_moved;
+	atomic64_t			num_evictions;
 	atomic_t			gpu_reset_counter;
 
 	/* display */
@@ -2125,6 +2151,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
 #define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v))
 #define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg))
 #define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
+#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
+#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
 #define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
 #define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
 #define WREG32_P(reg, val, mask)				\
@@ -2200,11 +2228,10 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
  */
 #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
 #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
-#define amdgpu_asic_wait_for_mc_idle(adev) (adev)->asic_funcs->wait_for_mc_idle((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
-#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
+#define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
@@ -2257,6 +2284,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps))
 #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
+#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
 
 #define amdgpu_dpm_get_temperature(adev) \
 	((adev)->pp_enabled ?						\
@@ -2335,6 +2364,18 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_force_clock_level(adev, type, level) \
 		(adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
 
+#define amdgpu_dpm_get_sclk_od(adev) \
+	(adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle)
+
+#define amdgpu_dpm_set_sclk_od(adev, value) \
+	(adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value)
+
+#define amdgpu_dpm_get_mclk_od(adev) \
+	((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle))
+
+#define amdgpu_dpm_set_mclk_od(adev, value) \
+	((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
+
 #define amdgpu_dpm_dispatch_task(adev, event_id, input, output)		\
 	(adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
 
@@ -2376,9 +2417,13 @@ bool amdgpu_device_is_px(struct drm_device *dev);
 #if defined(CONFIG_VGA_SWITCHEROO)
 void amdgpu_register_atpx_handler(void);
 void amdgpu_unregister_atpx_handler(void);
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+bool amdgpu_is_atpx_hybrid(void);
 #else
 static inline void amdgpu_register_atpx_handler(void) {}
 static inline void amdgpu_unregister_atpx_handler(void) {}
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
 #endif
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 32809f749903..d080d0807a5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -240,8 +240,8 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;
 
-	if (rdev->asic_funcs->get_gpu_clock_counter)
-		return rdev->asic_funcs->get_gpu_clock_counter(rdev);
+	if (rdev->gfx.funcs->get_gpu_clock_counter)
+		return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 35a1248aaa77..0494fe7b62c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include "amd_acpi.h"
 
@@ -27,6 +28,7 @@ struct amdgpu_atpx_functions {
 struct amdgpu_atpx {
 	acpi_handle handle;
 	struct amdgpu_atpx_functions functions;
+	bool is_hybrid;
 };
 
 static struct amdgpu_atpx_priv {
@@ -63,6 +65,14 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }
 
+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+	return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool amdgpu_is_atpx_hybrid(void) {
+	return amdgpu_atpx_priv.atpx.is_hybrid;
+}
+
 /**
  * amdgpu_atpx_call - call an ATPX method
  *
@@ -142,18 +152,12 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
  */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	if (atpx->functions.power_cntl == false) {
-		printk("ATPX dGPU power cntl not present, forcing\n");
-		atpx->functions.power_cntl = true;
-	}
+	u32 valid_bits = 0;
 
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
 		size_t size;
-		u32 valid_bits;
 
 		info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
 		if (!info)
@@ -172,19 +176,43 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 		memcpy(&output, info->buffer.pointer, size);
 
 		valid_bits = output.flags & output.valid_flags;
-		/* if separate mux flag is set, mux controls are required */
-		if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
-			atpx->functions.i2c_mux_cntl = true;
-			atpx->functions.disp_mux_cntl = true;
-		}
-		/* if any outputs are muxed, mux controls are required */
-		if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
-				  ATPX_TV_SIGNAL_MUXED |
-				  ATPX_DFP_SIGNAL_MUXED))
-			atpx->functions.disp_mux_cntl = true;
 
 		kfree(info);
 	}
+
+	/* if separate mux flag is set, mux controls are required */
+	if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+		atpx->functions.i2c_mux_cntl = true;
+		atpx->functions.disp_mux_cntl = true;
+	}
+	/* if any outputs are muxed, mux controls are required */
+	if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+			  ATPX_TV_SIGNAL_MUXED |
+			  ATPX_DFP_SIGNAL_MUXED))
+		atpx->functions.disp_mux_cntl = true;
+
+
+	/* some bioses set these bits rather than flagging power_cntl as supported */
+	if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+			  ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+		atpx->functions.power_cntl = true;
+
+	atpx->is_hybrid = false;
+	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+		printk("ATPX Hybrid Graphics\n");
+#if 1
+		/* This is a temporary hack until the D3 cold support
+		 * makes it upstream.  The ATPX power_control method seems
+		 * to still work on even if the system should be using
+		 * the new standardized hybrid D3 cold ACPI interface.
+		 */
+		atpx->functions.power_cntl = true;
+#else
+		atpx->functions.power_cntl = false;
+#endif
+		atpx->is_hybrid = true;
+	}
+
 	return 0;
 }
 
@@ -259,6 +287,10 @@ static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state)
 		if (!info)
 			return -EIO;
 		kfree(info);
+
+		/* 200ms delay is required after off */
+		if (state == 0)
+			msleep(200);
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 823bf5e0b0c8..651115dcce12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -94,6 +94,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	unsigned last_entry = 0, first_userptr = num_entries;
 	unsigned i;
 	int r;
+	unsigned long total_size = 0;
 
 	array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
 	if (!array)
@@ -140,6 +141,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
 			oa_obj = entry->robj;
 
+		total_size += amdgpu_bo_size(entry->robj);
 		trace_amdgpu_bo_list_set(list, entry->robj);
 	}
 
@@ -155,6 +157,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	list->array = array;
 	list->num_entries = num_entries;
 
+	trace_amdgpu_cs_bo_status(list->num_entries, total_size);
 	return 0;
 
 error_free:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 199f76baf22c..5556ce979199 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -312,6 +312,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
 		return RREG32_UVD_CTX(index);
 	case CGS_IND_REG__DIDT:
 		return RREG32_DIDT(index);
+	case CGS_IND_REG_GC_CAC:
+		return RREG32_GC_CAC(index);
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return 0;
@@ -336,6 +338,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 		return WREG32_UVD_CTX(index, value);
 	case CGS_IND_REG__DIDT:
 		return WREG32_DIDT(index, value);
+	case CGS_IND_REG_GC_CAC:
+		return WREG32_GC_CAC(index, value);
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return;
@@ -696,6 +700,17 @@ static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
 	return result;
 }
 
+static int amdgpu_cgs_rel_firmware(struct cgs_device *cgs_device, enum cgs_ucode_id type)
+{
+	CGS_FUNC_ADEV;
+	if ((CGS_UCODE_ID_SMU == type) || (CGS_UCODE_ID_SMU_SK == type)) {
+		release_firmware(adev->pm.fw);
+		return 0;
+	}
+	/* cannot release other firmware because they are not created by cgs */
+	return -EINVAL;
+}
+
 static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					enum cgs_ucode_id type,
 					struct cgs_firmware_info *info)
@@ -776,6 +791,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 		}
 
 		hdr = (const struct smc_firmware_header_v1_0 *)	adev->pm.fw->data;
+		amdgpu_ucode_print_smc_hdr(&hdr->header);
 		adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
 		ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
@@ -819,6 +835,9 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 	case CGS_SYSTEM_INFO_GFX_CU_INFO:
 		sys_info->value = adev->gfx.cu_info.number;
 		break;
+	case CGS_SYSTEM_INFO_GFX_SE_INFO:
+		sys_info->value = adev->gfx.config.max_shader_engines;
+		break;
 	default:
 		return -ENODEV;
 	}
@@ -898,7 +917,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 	struct cgs_acpi_method_argument *argument = NULL;
 	uint32_t i, count;
 	acpi_status status;
-	int result;
+	int result = 0;
 	uint32_t func_no = 0xFFFFFFFF;
 
 	handle = ACPI_HANDLE(&adev->pdev->dev);
@@ -961,11 +980,11 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 				params->integer.value = argument->value;
 				break;
 			case ACPI_TYPE_STRING:
-				params->string.length = argument->method_length;
+				params->string.length = argument->data_length;
 				params->string.pointer = argument->pointer;
 				break;
 			case ACPI_TYPE_BUFFER:
-				params->buffer.length = argument->method_length;
+				params->buffer.length = argument->data_length;
 				params->buffer.pointer = argument->pointer;
 				break;
 			default:
@@ -1068,17 +1087,14 @@ int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 	struct cgs_acpi_method_info info = {0};
 
 	acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
-	acpi_input[0].method_length = sizeof(uint32_t);
 	acpi_input[0].data_length = sizeof(uint32_t);
 	acpi_input[0].value = acpi_function;
 
 	acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
-	acpi_input[1].method_length = CGS_ACPI_MAX_BUFFER_SIZE;
 	acpi_input[1].data_length = input_size;
 	acpi_input[1].pointer = pinput;
 
 	acpi_output.type = CGS_ACPI_TYPE_BUFFER;
-	acpi_output.method_length = CGS_ACPI_MAX_BUFFER_SIZE;
 	acpi_output.data_length = output_size;
 	acpi_output.pointer = poutput;
 
@@ -1125,6 +1141,7 @@ static const struct cgs_ops amdgpu_cgs_ops = {
 	amdgpu_cgs_pm_query_clock_limits,
 	amdgpu_cgs_set_camera_voltages,
 	amdgpu_cgs_get_firmware_info,
+	amdgpu_cgs_rel_firmware,
 	amdgpu_cgs_set_powergating_state,
 	amdgpu_cgs_set_clockgating_state,
 	amdgpu_cgs_get_active_displays_info,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index cb07da41152b..ff0b55a65ca3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -1690,7 +1690,6 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 						   DRM_MODE_SCALE_NONE);
 			/* no HPD on analog connectors */
 			amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
 			break;
@@ -1893,8 +1892,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 	}
 
 	if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 9bc8f1d99733..0307ff5887c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -216,11 +216,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (ret)
 		goto free_all_kdata;
 
-	if (p->uf_entry.robj) {
-		p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
-		p->job->uf_offset = uf_offset;
-	}
-
+	if (p->uf_entry.robj)
+		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
 	return 0;
 
@@ -459,7 +456,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		list_splice(&need_pages, &p->validated);
 	}
 
-	amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
+	amdgpu_vm_get_pt_bos(p->adev, &fpriv->vm, &duplicates);
 
 	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
 	p->bytes_moved = 0;
@@ -472,6 +469,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_validate;
 
+	fpriv->vm.last_eviction_counter =
+		atomic64_read(&p->adev->num_evictions);
+
 	if (p->bo_list) {
 		struct amdgpu_bo *gds = p->bo_list->gds_obj;
 		struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -499,6 +499,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		}
 	}
 
+	if (p->uf_entry.robj)
+		p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
+
 error_validate:
 	if (r) {
 		amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
@@ -653,18 +656,21 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 
 	/* Only for UVD/VCE VM emulation */
 	if (ring->funcs->parse_cs) {
+		p->job->vm = NULL;
 		for (i = 0; i < p->job->num_ibs; i++) {
 			r = amdgpu_ring_parse_cs(ring, p, i);
 			if (r)
 				return r;
 		}
-	}
+	} else {
+		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 
-	r = amdgpu_bo_vm_update_pte(p, vm);
-	if (!r)
-		amdgpu_cs_sync_rings(p);
+		r = amdgpu_bo_vm_update_pte(p, vm);
+		if (r)
+			return r;
+	}
 
-	return r;
+	return amdgpu_cs_sync_rings(p);
 }
 
 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
@@ -761,7 +767,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	}
 
 	/* UVD & VCE fw doesn't support user fences */
-	if (parser->job->uf_bo && (
+	if (parser->job->uf_addr && (
 	    parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
 	    parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
@@ -830,17 +836,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 {
 	struct amdgpu_ring *ring = p->job->ring;
 	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
-	struct fence *fence;
 	struct amdgpu_job *job;
 	int r;
 
 	job = p->job;
 	p->job = NULL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched,
-			       entity, amdgpu_job_timeout_func,
-			       amdgpu_job_free_func,
-			       p->filp, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
 	if (r) {
 		amdgpu_job_free(job);
 		return r;
@@ -848,9 +850,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 
 	job->owner = p->filp;
 	job->ctx = entity->fence_context;
-	p->fence = fence_get(fence);
-	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
+	p->fence = fence_get(&job->base.s_fence->finished);
+	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
+	amdgpu_job_free_resources(job);
 
 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 2ab5e0b184fb..614fb026436d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -25,6 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/kthread.h>
 #include <linux/console.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@@ -35,6 +36,7 @@
 #include <linux/vga_switcheroo.h>
 #include <linux/efi.h>
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 #include "amdgpu_i2c.h"
 #include "atom.h"
 #include "amdgpu_atombios.h"
@@ -79,24 +81,27 @@ bool amdgpu_device_is_px(struct drm_device *dev)
 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 			bool always_indirect)
 {
+	uint32_t ret;
+
 	if ((reg * 4) < adev->rmmio_size && !always_indirect)
-		return readl(((void __iomem *)adev->rmmio) + (reg * 4));
+		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 	else {
 		unsigned long flags;
-		uint32_t ret;
 
 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-
-		return ret;
 	}
+	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
+	return ret;
 }
 
 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    bool always_indirect)
 {
+	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
+	
 	if ((reg * 4) < adev->rmmio_size && !always_indirect)
 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 	else {
@@ -827,8 +832,10 @@ static uint32_t cail_ioreg_read(struct card_info *info, uint32_t reg)
  */
 static void amdgpu_atombios_fini(struct amdgpu_device *adev)
 {
-	if (adev->mode_info.atom_context)
+	if (adev->mode_info.atom_context) {
 		kfree(adev->mode_info.atom_context->scratch);
+		kfree(adev->mode_info.atom_context->iio);
+	}
 	kfree(adev->mode_info.atom_context);
 	adev->mode_info.atom_context = NULL;
 	kfree(adev->mode_info.atom_card_info);
@@ -1068,11 +1075,14 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 	int i, r = 0;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
 		if (adev->ip_blocks[i].type == block_type) {
 			r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
 									    state);
 			if (r)
 				return r;
+			break;
 		}
 	}
 	return r;
@@ -1085,16 +1095,53 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 	int i, r = 0;
 
 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
 		if (adev->ip_blocks[i].type == block_type) {
 			r = adev->ip_blocks[i].funcs->set_powergating_state((void *)adev,
 									    state);
 			if (r)
 				return r;
+			break;
 		}
 	}
 	return r;
 }
 
+int amdgpu_wait_for_idle(struct amdgpu_device *adev,
+			 enum amd_ip_block_type block_type)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
+		if (adev->ip_blocks[i].type == block_type) {
+			r = adev->ip_blocks[i].funcs->wait_for_idle((void *)adev);
+			if (r)
+				return r;
+			break;
+		}
+	}
+	return 0;
+
+}
+
+bool amdgpu_is_idle(struct amdgpu_device *adev,
+		    enum amd_ip_block_type block_type)
+{
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
+		if (adev->ip_blocks[i].type == block_type)
+			return adev->ip_blocks[i].funcs->is_idle((void *)adev);
+	}
+	return true;
+
+}
+
 const struct amdgpu_ip_block_version * amdgpu_get_ip_block(
 					struct amdgpu_device *adev,
 					enum amd_ip_block_type type)
@@ -1207,6 +1254,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 		}
 	}
 
+	adev->cg_flags &= amdgpu_cg_mask;
+	adev->pg_flags &= amdgpu_pg_mask;
+
 	return 0;
 }
 
@@ -1325,6 +1375,11 @@ static int amdgpu_fini(struct amdgpu_device *adev)
 		adev->ip_block_status[i].valid = false;
 	}
 
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (adev->ip_blocks[i].funcs->late_fini)
+			adev->ip_blocks[i].funcs->late_fini((void *)adev);
+	}
+
 	return 0;
 }
 
@@ -1378,6 +1433,15 @@ static int amdgpu_resume(struct amdgpu_device *adev)
 	return 0;
 }
 
+static bool amdgpu_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -1424,9 +1488,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
 	adev->didt_rreg = &amdgpu_invalid_rreg;
 	adev->didt_wreg = &amdgpu_invalid_wreg;
+	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
+	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
 
+
 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
@@ -1451,6 +1518,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	spin_lock_init(&adev->pcie_idx_lock);
 	spin_lock_init(&adev->uvd_ctx_idx_lock);
 	spin_lock_init(&adev->didt_idx_lock);
+	spin_lock_init(&adev->gc_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);
 
 	adev->rmmio_base = pci_resource_start(adev->pdev, 5);
@@ -1495,29 +1563,38 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);
 
 	/* Read BIOS */
-	if (!amdgpu_get_bios(adev))
-		return -EINVAL;
+	if (!amdgpu_get_bios(adev)) {
+		r = -EINVAL;
+		goto failed;
+	}
 	/* Must be an ATOMBIOS */
 	if (!adev->is_atom_bios) {
 		dev_err(adev->dev, "Expecting atombios for GPU\n");
-		return -EINVAL;
+		r = -EINVAL;
+		goto failed;
 	}
 	r = amdgpu_atombios_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	/* See if the asic supports SR-IOV */
 	adev->virtualization.supports_sr_iov =
 		amdgpu_atombios_has_gpu_virtualization_table(adev);
 
+	/* Check if we are executing in a virtualized environment */
+	adev->virtualization.is_virtual = amdgpu_device_is_virtual();
+	adev->virtualization.caps = amdgpu_asic_get_virtual_caps(adev);
+
 	/* Post card if necessary */
 	if (!amdgpu_card_posted(adev) ||
-	    adev->virtualization.supports_sr_iov) {
+	    (adev->virtualization.is_virtual &&
+	     !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
-			return -EINVAL;
+			r = -EINVAL;
+			goto failed;
 		}
 		DRM_INFO("GPU not posted. posting now...\n");
 		amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1527,7 +1604,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_atombios_get_clock_info(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
-		return r;
+		goto failed;
 	}
 	/* init i2c buses */
 	amdgpu_atombios_i2c_init(adev);
@@ -1536,7 +1613,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_fence_driver_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	/* init the mode config */
@@ -1546,7 +1623,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r) {
 		dev_err(adev->dev, "amdgpu_init failed\n");
 		amdgpu_fini(adev);
-		return r;
+		goto failed;
 	}
 
 	adev->accel_working = true;
@@ -1556,7 +1633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_ib_pool_init(adev);
 	if (r) {
 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
-		return r;
+		goto failed;
 	}
 
 	r = amdgpu_ib_ring_tests(adev);
@@ -1573,6 +1650,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
 	}
 
+	r = amdgpu_debugfs_firmware_init(adev);
+	if (r) {
+		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
+		return r;
+	}
+
 	if ((amdgpu_testing & 1)) {
 		if (adev->accel_working)
 			amdgpu_test_moves(adev);
@@ -1598,10 +1681,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_late_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_late_init failed\n");
-		return r;
+		goto failed;
 	}
 
 	return 0;
+
+failed:
+	if (runtime)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
+	return r;
 }
 
 static void amdgpu_debugfs_remove_files(struct amdgpu_device *adev);
@@ -1636,6 +1724,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	kfree(adev->bios);
 	adev->bios = NULL;
 	vga_switcheroo_unregister_client(adev->pdev);
+	if (adev->flags & AMD_IS_PX)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
 	vga_client_register(adev->pdev, NULL, NULL, NULL);
 	if (adev->rio_mem)
 		pci_iounmap(adev->pdev, adev->rio_mem);
@@ -1841,11 +1931,6 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
  */
 int amdgpu_gpu_reset(struct amdgpu_device *adev)
 {
-	unsigned ring_sizes[AMDGPU_MAX_RINGS];
-	uint32_t *ring_data[AMDGPU_MAX_RINGS];
-
-	bool saved = false;
-
 	int i, r;
 	int resched;
 
@@ -1854,22 +1939,30 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
 
-	r = amdgpu_suspend(adev);
-
+	/* block scheduler */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
+
 		if (!ring)
 			continue;
-
-		ring_sizes[i] = amdgpu_ring_backup(ring, &ring_data[i]);
-		if (ring_sizes[i]) {
-			saved = true;
-			dev_info(adev->dev, "Saved %d dwords of commands "
-				 "on ring %d.\n", ring_sizes[i], i);
-		}
+		kthread_park(ring->sched.thread);
+		amd_sched_hw_job_reset(&ring->sched);
 	}
+	/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
+	amdgpu_fence_driver_force_completion(adev);
+
+	/* save scratch */
+	amdgpu_atombios_scratch_regs_save(adev);
+	r = amdgpu_suspend(adev);
 
 retry:
+	/* Disable fb access */
+	if (adev->mode_info.num_crtc) {
+		struct amdgpu_mode_mc_save save;
+		amdgpu_display_stop_mc_access(adev, &save);
+		amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
+	}
+
 	r = amdgpu_asic_reset(adev);
 	/* post card */
 	amdgpu_atom_asic_init(adev->mode_info.atom_context);
@@ -1878,32 +1971,29 @@ retry:
 		dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
 		r = amdgpu_resume(adev);
 	}
-
+	/* restore scratch */
+	amdgpu_atombios_scratch_regs_restore(adev);
 	if (!r) {
+		r = amdgpu_ib_ring_tests(adev);
+		if (r) {
+			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
+			r = amdgpu_suspend(adev);
+			goto retry;
+		}
+
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
 			if (!ring)
 				continue;
-
-			amdgpu_ring_restore(ring, ring_sizes[i], ring_data[i]);
-			ring_sizes[i] = 0;
-			ring_data[i] = NULL;
-		}
-
-		r = amdgpu_ib_ring_tests(adev);
-		if (r) {
-			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
-			if (saved) {
-				saved = false;
-				r = amdgpu_suspend(adev);
-				goto retry;
-			}
+			amd_sched_job_recovery(&ring->sched);
+			kthread_unpark(ring->sched.thread);
 		}
 	} else {
-		amdgpu_fence_driver_force_completion(adev);
+		dev_err(adev->dev, "asic resume failed (%d).\n", r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			if (adev->rings[i])
-				kfree(ring_data[i]);
+			if (adev->rings[i]) {
+				kthread_unpark(adev->rings[i]->sched.thread);
+			}
 		}
 	}
 
@@ -1914,13 +2004,11 @@ retry:
 		/* bad news, how to tell it to userspace ? */
 		dev_info(adev->dev, "GPU reset failed\n");
 	}
+	amdgpu_irq_gpu_reset_resume_helper(adev);
 
 	return r;
 }
 
-#define AMDGPU_DEFAULT_PCIE_GEN_MASK 0x30007  /* gen: chipset 1/2, asic 1/2/3 */
-#define AMDGPU_DEFAULT_PCIE_MLW_MASK 0x2f0000 /* 1/2/4/8/16 lanes */
-
 void amdgpu_get_pcie_info(struct amdgpu_device *adev)
 {
 	u32 mask;
@@ -2074,20 +2162,43 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 	struct amdgpu_device *adev = f->f_inode->i_private;
 	ssize_t result = 0;
 	int r;
+	bool use_bank;
+	unsigned instance_bank, sh_bank, se_bank;
 
 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;
 
+	if (*pos & (1ULL << 62)) {
+		se_bank = (*pos >> 24) & 0x3FF;
+		sh_bank = (*pos >> 34) & 0x3FF;
+		instance_bank = (*pos >> 44) & 0x3FF;
+		use_bank = 1;
+		*pos &= 0xFFFFFF;
+	} else {
+		use_bank = 0;
+	}
+
+	if (use_bank) {
+		if (sh_bank >= adev->gfx.config.max_sh_per_se ||
+		    se_bank >= adev->gfx.config.max_shader_engines)
+			return -EINVAL;
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, se_bank,
+					sh_bank, instance_bank);
+	}
+
 	while (size) {
 		uint32_t value;
 
 		if (*pos > adev->rmmio_size)
-			return result;
+			goto end;
 
 		value = RREG32(*pos >> 2);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
+		if (r) {
+			result = r;
+			goto end;
+		}
 
 		result += 4;
 		buf += 4;
@@ -2095,6 +2206,12 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		size -= 4;
 	}
 
+end:
+	if (use_bank) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	}
+
 	return result;
 }
 
@@ -2294,6 +2411,68 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	return result;
 }
 
+static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+	uint32_t *config, no_regs = 0;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	config = kmalloc(256 * sizeof(*config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	/* version, increment each time something is added */
+	config[no_regs++] = 0;
+	config[no_regs++] = adev->gfx.config.max_shader_engines;
+	config[no_regs++] = adev->gfx.config.max_tile_pipes;
+	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
+	config[no_regs++] = adev->gfx.config.max_sh_per_se;
+	config[no_regs++] = adev->gfx.config.max_backends_per_se;
+	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
+	config[no_regs++] = adev->gfx.config.max_gprs;
+	config[no_regs++] = adev->gfx.config.max_gs_threads;
+	config[no_regs++] = adev->gfx.config.max_hw_contexts;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
+	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.num_tile_pipes;
+	config[no_regs++] = adev->gfx.config.backend_enable_mask;
+	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
+	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
+	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
+	config[no_regs++] = adev->gfx.config.num_gpus;
+	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
+	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
+	config[no_regs++] = adev->gfx.config.gb_addr_config;
+	config[no_regs++] = adev->gfx.config.num_rbs;
+
+	while (size && (*pos < no_regs * 4)) {
+		uint32_t value;
+
+		value = config[*pos >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			kfree(config);
+			return r;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	kfree(config);
+	return result;
+}
+
+
 static const struct file_operations amdgpu_debugfs_regs_fops = {
 	.owner = THIS_MODULE,
 	.read = amdgpu_debugfs_regs_read,
@@ -2319,11 +2498,18 @@ static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
 	.llseek = default_llseek
 };
 
+static const struct file_operations amdgpu_debugfs_gca_config_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gca_config_read,
+	.llseek = default_llseek
+};
+
 static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_fops,
 	&amdgpu_debugfs_regs_didt_fops,
 	&amdgpu_debugfs_regs_pcie_fops,
 	&amdgpu_debugfs_regs_smc_fops,
+	&amdgpu_debugfs_gca_config_fops,
 };
 
 static const char *debugfs_regs_names[] = {
@@ -2331,6 +2517,7 @@ static const char *debugfs_regs_names[] = {
 	"amdgpu_regs_didt",
 	"amdgpu_regs_pcie",
 	"amdgpu_regs_smc",
+	"amdgpu_gca_config",
 };
 
 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index a6eecf6f9065..7dbe8d02c5a6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -220,19 +220,17 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
 
 	r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base);
 	if (unlikely(r != 0)) {
-		amdgpu_bo_unreserve(new_rbo);
 		r = -EINVAL;
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
-		goto cleanup;
+		goto unreserve;
 	}
 
 	r = reservation_object_get_fences_rcu(new_rbo->tbo.resv, &work->excl,
 					      &work->shared_count,
 					      &work->shared);
 	if (unlikely(r != 0)) {
-		amdgpu_bo_unreserve(new_rbo);
 		DRM_ERROR("failed to get fences for buffer\n");
-		goto cleanup;
+		goto unpin;
 	}
 
 	amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags);
@@ -275,9 +273,11 @@ pflip_cleanup:
 		DRM_ERROR("failed to reserve new rbo in error path\n");
 		goto cleanup;
 	}
+unpin:
 	if (unlikely(amdgpu_bo_unpin(new_rbo) != 0)) {
 		DRM_ERROR("failed to unpin new rbo in error path\n");
 	}
+unreserve:
 	amdgpu_bo_unreserve(new_rbo);
 
 cleanup:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index f888c015f76c..015f1f4aae53 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -82,8 +82,12 @@ int amdgpu_exp_hw_support = 0;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
 int amdgpu_powerplay = -1;
+int amdgpu_powercontainment = 1;
 unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
+unsigned amdgpu_cg_mask = 0xffffffff;
+unsigned amdgpu_pg_mask = 0xffffffff;
+char *amdgpu_disable_cu = NULL;
 
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -160,6 +164,9 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);
+
+MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = disable)");
+module_param_named(powercontainment, amdgpu_powercontainment, int, 0444);
 #endif
 
 MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
@@ -168,6 +175,15 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
 MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
 module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
 
+MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
+module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+
+MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
+module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
+
+MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
+module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	/* Kaveri */
@@ -413,7 +429,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_ignore_hotplug(pdev);
-	pci_set_power_state(pdev, PCI_D3cold);
+	if (amdgpu_is_atpx_hybrid())
+		pci_set_power_state(pdev, PCI_D3cold);
+	else if (!amdgpu_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D3hot);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
 
 	return 0;
@@ -430,7 +449,9 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
 
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
-	pci_set_power_state(pdev, PCI_D0);
+	if (amdgpu_is_atpx_hybrid() ||
+	    !amdgpu_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	ret = pci_enable_device(pdev);
 	if (ret)
@@ -515,7 +536,7 @@ static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
-	    DRIVER_PRIME | DRIVER_RENDER,
+	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
 	.dev_priv_size = 0,
 	.load = amdgpu_driver_load_kms,
 	.open = amdgpu_driver_open_kms,
@@ -590,7 +611,6 @@ static int __init amdgpu_init(void)
 	DRM_INFO("amdgpu kernel modesetting enabled.\n");
 	driver = &kms_driver;
 	pdriver = &amdgpu_kms_pci_driver;
-	driver->driver_features |= DRIVER_MODESET;
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	amdgpu_register_atpx_handler();
 	/* let modprobe override vga console setting */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8fab6486064f..88fbed2389c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -503,7 +503,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (r)
 		goto error_print;
 
-	amdgpu_vm_get_pt_bos(bo_va->vm, &duplicates);
+	amdgpu_vm_get_pt_bos(adev, bo_va->vm, &duplicates);
 	list_for_each_entry(entry, &list, head) {
 		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
 		/* if anything is swapped out don't swap it in here,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 9f95da4f0536..a074edd95c70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -70,3 +70,47 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
 		}
 	}
 }
+
+/**
+ * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
+ *
+ * @mask: array in which the per-shader array disable masks will be stored
+ * @max_se: number of SEs
+ * @max_sh: number of SHs
+ *
+ * The bitmask of CUs to be disabled in the shader array determined by se and
+ * sh is stored in mask[se * max_sh + sh].
+ */
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
+{
+	unsigned se, sh, cu;
+	const char *p;
+
+	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
+
+	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
+		return;
+
+	p = amdgpu_disable_cu;
+	for (;;) {
+		char *next;
+		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+		if (ret < 3) {
+			DRM_ERROR("amdgpu: could not parse disable_cu\n");
+			return;
+		}
+
+		if (se < max_se && sh < max_sh && cu < 16) {
+			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
+			mask[se * max_sh + sh] |= 1u << cu;
+		} else {
+			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
+				  se, sh, cu);
+		}
+
+		next = strchr(p, ',');
+		if (!next)
+			break;
+		p = next + 1;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index dc06cbda7be6..51321e154c09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -27,4 +27,6 @@
 int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
 void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);
 
+unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 34e35423b78e..46c3097c5224 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -160,10 +160,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
 	if (vm) {
-		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
-				    job->gds_base, job->gds_size,
-				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size);
+		r = amdgpu_vm_flush(ring, job);
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@@ -203,11 +200,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	}
 
 	/* wrap the last IB with fence */
-	if (job && job->uf_bo) {
-		uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);
-
-		addr += job->uf_offset;
-		amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
+	if (job && job->uf_addr) {
+		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
 				       AMDGPU_FENCE_FLAG_64BIT);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 835a3fa8d8df..278708f5a744 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -383,6 +383,18 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
 	return r;
 }
 
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
+{
+	int i, j;
+	for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; i++) {
+		struct amdgpu_irq_src *src = adev->irq.sources[i];
+		if (!src)
+			continue;
+		for (j = 0; j < src->num_types; j++)
+			amdgpu_irq_update(adev, src, j);
+	}
+}
+
 /**
  * amdgpu_irq_get - enable interrupt
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
index e124b59f39c1..7ef09352e534 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@@ -94,6 +94,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		   unsigned type);
 bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 			unsigned type);
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);
 
 int amdgpu_irq_add_domain(struct amdgpu_device *adev);
 void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index f0dafa514fe4..aaee0c8f6731 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -28,21 +28,15 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
-static void amdgpu_job_free_handler(struct work_struct *ws)
+static void amdgpu_job_timedout(struct amd_sched_job *s_job)
 {
-	struct amdgpu_job *job = container_of(ws, struct amdgpu_job, base.work_free_job);
-	amd_sched_job_put(&job->base);
-}
+	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
-void amdgpu_job_timeout_func(struct work_struct *work)
-{
-	struct amdgpu_job *job = container_of(work, struct amdgpu_job, base.work_tdr.work);
 	DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
-				job->base.sched->name,
-				(uint32_t)atomic_read(&job->ring->fence_drv.last_seq),
-				job->ring->fence_drv.sync_seq);
-
-	amd_sched_job_put(&job->base);
+		  job->base.sched->name,
+		  atomic_read(&job->ring->fence_drv.last_seq),
+		  job->ring->fence_drv.sync_seq);
+	amdgpu_gpu_reset(job->adev);
 }
 
 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@@ -63,7 +57,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
-	INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);
 
 	amdgpu_sync_create(&(*job)->sync);
 
@@ -86,27 +79,33 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 	return r;
 }
 
-void amdgpu_job_free(struct amdgpu_job *job)
+void amdgpu_job_free_resources(struct amdgpu_job *job)
 {
-	unsigned i;
 	struct fence *f;
+	unsigned i;
+
 	/* use sched fence if available */
-	f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
+	f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;
 
 	for (i = 0; i < job->num_ibs; ++i)
-		amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
-	fence_put(job->fence);
+		amdgpu_ib_free(job->adev, &job->ibs[i], f);
+}
 
-	amdgpu_bo_unref(&job->uf_bo);
-	amdgpu_sync_free(&job->sync);
+void amdgpu_job_free_cb(struct amd_sched_job *s_job)
+{
+	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
 
-	if (!job->base.use_sched)
-		kfree(job);
+	fence_put(job->fence);
+	amdgpu_sync_free(&job->sync);
+	kfree(job);
 }
 
-void amdgpu_job_free_func(struct kref *refcount)
+void amdgpu_job_free(struct amdgpu_job *job)
 {
-	struct amdgpu_job *job = container_of(refcount, struct amdgpu_job, base.refcount);
+	amdgpu_job_free_resources(job);
+
+	fence_put(job->fence);
+	amdgpu_sync_free(&job->sync);
 	kfree(job);
 }
 
@@ -114,22 +113,20 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f)
 {
-	struct fence *fence;
 	int r;
 	job->ring = ring;
 
 	if (!f)
 		return -EINVAL;
 
-	r = amd_sched_job_init(&job->base, &ring->sched,
-			       entity, amdgpu_job_timeout_func,
-			       amdgpu_job_free_func, owner, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched, entity, owner);
 	if (r)
 		return r;
 
 	job->owner = owner;
 	job->ctx = entity->fence_context;
-	*f = fence_get(fence);
+	*f = fence_get(&job->base.s_fence->finished);
+	amdgpu_job_free_resources(job);
 	amd_sched_entity_push_job(&job->base);
 
 	return 0;
@@ -147,8 +144,8 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 		int r;
 
 		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
-				      &job->base.s_fence->base,
-				      &job->vm_id, &job->vm_pd_addr);
+				      &job->base.s_fence->finished,
+				      job);
 		if (r)
 			DRM_ERROR("Error getting VM ID (%d)\n", r);
 
@@ -170,11 +167,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	}
 	job = to_amdgpu_job(sched_job);
 
-	r = amdgpu_sync_wait(&job->sync);
-	if (r) {
-		DRM_ERROR("failed to sync wait (%d)\n", r);
-		return NULL;
-	}
+	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
 
 	trace_amdgpu_sched_run_job(job);
 	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
@@ -185,14 +178,15 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	}
 
 err:
+	/* if gpu reset, hw fence will be replaced here */
+	fence_put(job->fence);
 	job->fence = fence;
-	amdgpu_job_free(job);
 	return fence;
 }
 
 const struct amd_sched_backend_ops amdgpu_sched_ops = {
 	.dependency = amdgpu_job_dependency,
 	.run_job = amdgpu_job_run,
-	.begin_job = amd_sched_job_begin,
-	.finish_job = amd_sched_job_finish,
+	.timedout_job = amdgpu_job_timedout,
+	.free_job = amdgpu_job_free_cb
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 38a28d137f1d..a8efbb54423f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -148,6 +148,65 @@ out:
 	return r;
 }
 
+static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
+				struct drm_amdgpu_query_fw *query_fw,
+				struct amdgpu_device *adev)
+{
+	switch (query_fw->fw_type) {
+	case AMDGPU_INFO_FW_VCE:
+		fw_info->ver = adev->vce.fw_version;
+		fw_info->feature = adev->vce.fb_version;
+		break;
+	case AMDGPU_INFO_FW_UVD:
+		fw_info->ver = adev->uvd.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GMC:
+		fw_info->ver = adev->mc.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GFX_ME:
+		fw_info->ver = adev->gfx.me_fw_version;
+		fw_info->feature = adev->gfx.me_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_PFP:
+		fw_info->ver = adev->gfx.pfp_fw_version;
+		fw_info->feature = adev->gfx.pfp_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_CE:
+		fw_info->ver = adev->gfx.ce_fw_version;
+		fw_info->feature = adev->gfx.ce_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC:
+		fw_info->ver = adev->gfx.rlc_fw_version;
+		fw_info->feature = adev->gfx.rlc_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_MEC:
+		if (query_fw->index == 0) {
+			fw_info->ver = adev->gfx.mec_fw_version;
+			fw_info->feature = adev->gfx.mec_feature_version;
+		} else if (query_fw->index == 1) {
+			fw_info->ver = adev->gfx.mec2_fw_version;
+			fw_info->feature = adev->gfx.mec2_feature_version;
+		} else
+			return -EINVAL;
+		break;
+	case AMDGPU_INFO_FW_SMC:
+		fw_info->ver = adev->pm.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_SDMA:
+		if (query_fw->index >= adev->sdma.num_instances)
+			return -EINVAL;
+		fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
+		fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /*
  * Userspace get information ioctl
  */
@@ -294,67 +353,20 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
 	}
 	case AMDGPU_INFO_TIMESTAMP:
-		ui64 = amdgpu_asic_get_gpu_clock_counter(adev);
+		ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_FW_VERSION: {
 		struct drm_amdgpu_info_firmware fw_info;
+		int ret;
 
 		/* We only support one instance of each IP block right now. */
 		if (info->query_fw.ip_instance != 0)
 			return -EINVAL;
 
-		switch (info->query_fw.fw_type) {
-		case AMDGPU_INFO_FW_VCE:
-			fw_info.ver = adev->vce.fw_version;
-			fw_info.feature = adev->vce.fb_version;
-			break;
-		case AMDGPU_INFO_FW_UVD:
-			fw_info.ver = adev->uvd.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_GMC:
-			fw_info.ver = adev->mc.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_GFX_ME:
-			fw_info.ver = adev->gfx.me_fw_version;
-			fw_info.feature = adev->gfx.me_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_PFP:
-			fw_info.ver = adev->gfx.pfp_fw_version;
-			fw_info.feature = adev->gfx.pfp_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_CE:
-			fw_info.ver = adev->gfx.ce_fw_version;
-			fw_info.feature = adev->gfx.ce_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_RLC:
-			fw_info.ver = adev->gfx.rlc_fw_version;
-			fw_info.feature = adev->gfx.rlc_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_MEC:
-			if (info->query_fw.index == 0) {
-				fw_info.ver = adev->gfx.mec_fw_version;
-				fw_info.feature = adev->gfx.mec_feature_version;
-			} else if (info->query_fw.index == 1) {
-				fw_info.ver = adev->gfx.mec2_fw_version;
-				fw_info.feature = adev->gfx.mec2_feature_version;
-			} else
-				return -EINVAL;
-			break;
-		case AMDGPU_INFO_FW_SMC:
-			fw_info.ver = adev->pm.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_SDMA:
-			if (info->query_fw.index >= adev->sdma.num_instances)
-				return -EINVAL;
-			fw_info.ver = adev->sdma.instance[info->query_fw.index].fw_version;
-			fw_info.feature = adev->sdma.instance[info->query_fw.index].feature_version;
-			break;
-		default:
-			return -EINVAL;
-		}
+		ret = amdgpu_firmware_info(&fw_info, &info->query_fw, adev);
+		if (ret)
+			return ret;
+
 		return copy_to_user(out, &fw_info,
 				    min((size_t)size, sizeof(fw_info))) ? -EFAULT : 0;
 	}
@@ -453,7 +465,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			dev_info.max_memory_clock = adev->pm.default_mclk * 10;
 		}
 		dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
-		dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
+		dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
+			adev->gfx.config.max_shader_engines;
 		dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
 		dev_info._pad = 0;
 		dev_info.ids_flags = 0;
@@ -761,3 +774,130 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct drm_amdgpu_info_firmware fw_info;
+	struct drm_amdgpu_query_fw query_fw;
+	int ret, i;
+
+	/* VCE */
+	query_fw.fw_type = AMDGPU_INFO_FW_VCE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VCE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* UVD */
+	query_fw.fw_type = AMDGPU_INFO_FW_UVD;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "UVD feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* GMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* ME */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_ME;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "ME feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* PFP */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_PFP;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "PFP feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* CE */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_CE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "CE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
+	query_fw.index = 0;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MEC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC2 */
+	if (adev->asic_type == CHIP_KAVERI ||
+	    (adev->asic_type > CHIP_TOPAZ && adev->asic_type != CHIP_STONEY)) {
+		query_fw.index = 1;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "MEC2 feature version: %u, firmware version: 0x%08x\n",
+			   fw_info.feature, fw_info.ver);
+	}
+
+	/* SMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_SMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* SDMA */
+	query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		query_fw.index = i;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+			   i, fw_info.feature, fw_info.ver);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list amdgpu_firmware_info_list[] = {
+	{"amdgpu_firmware_info", amdgpu_debugfs_firmware_info, 0, NULL},
+};
+#endif
+
+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return amdgpu_debugfs_add_files(adev, amdgpu_firmware_info_list,
+					ARRAY_SIZE(amdgpu_firmware_info_list));
+#else
+	return 0;
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 7ecea83ce453..6f0873c75a25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -589,6 +589,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *new_mem)
 {
 	struct amdgpu_bo *rbo;
+	struct ttm_mem_reg *old_mem = &bo->mem;
 
 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return;
@@ -602,6 +603,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 
 	/* move_notify is called before move happens */
 	amdgpu_update_memory_usage(rbo->adev, &bo->mem, new_mem);
+
+	trace_amdgpu_ttm_bo_move(rbo, new_mem->mem_type, old_mem->mem_type);
 }
 
 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 589b36e8c5cf..ff63b88b0ffa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -270,30 +270,28 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	enum amd_pm_state_type state = 0;
-	long idx;
+	unsigned long idx;
 	int ret;
 
 	if (strlen(buf) == 1)
 		adev->pp_force_state_enabled = false;
-	else {
-		ret = kstrtol(buf, 0, &idx);
+	else if (adev->pp_enabled) {
+		struct pp_states_info data;
 
-		if (ret) {
+		ret = kstrtoul(buf, 0, &idx);
+		if (ret || idx >= ARRAY_SIZE(data.states)) {
 			count = -EINVAL;
 			goto fail;
 		}
 
-		if (adev->pp_enabled) {
-			struct pp_states_info data;
-			amdgpu_dpm_get_pp_num_states(adev, &data);
-			state = data.states[idx];
-			/* only set user selected power states */
-			if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
-				state != POWER_STATE_TYPE_DEFAULT) {
-				amdgpu_dpm_dispatch_task(adev,
-						AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
-				adev->pp_force_state_enabled = true;
-			}
+		amdgpu_dpm_get_pp_num_states(adev, &data);
+		state = data.states[idx];
+		/* only set user selected power states */
+		if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
+		    state != POWER_STATE_TYPE_DEFAULT) {
+			amdgpu_dpm_dispatch_task(adev,
+					AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
+			adev->pp_force_state_enabled = true;
 		}
 	}
 fail:
@@ -349,6 +347,8 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_SCLK, buf);
 
 	return size;
 }
@@ -365,7 +365,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -379,6 +381,8 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_SCLK, mask);
 fail:
 	return count;
 }
@@ -393,6 +397,8 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_MCLK, buf);
 
 	return size;
 }
@@ -409,7 +415,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -423,6 +431,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_MCLK, mask);
 fail:
 	return count;
 }
@@ -437,6 +447,8 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 
 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_PCIE, buf);
 
 	return size;
 }
@@ -453,7 +465,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];
 
-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@@ -467,6 +481,100 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 
 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_PCIE, mask);
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t value = 0;
+
+	if (adev->pp_enabled)
+		value = amdgpu_dpm_get_sclk_od(adev);
+	else if (adev->pm.funcs->get_sclk_od)
+		value = adev->pm.funcs->get_sclk_od(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long int value;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled) {
+		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
+	} else if (adev->pm.funcs->set_sclk_od) {
+		adev->pm.funcs->set_sclk_od(adev, (uint32_t)value);
+		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
+		amdgpu_pm_compute_clocks(adev);
+	}
+
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t value = 0;
+
+	if (adev->pp_enabled)
+		value = amdgpu_dpm_get_mclk_od(adev);
+	else if (adev->pm.funcs->get_mclk_od)
+		value = adev->pm.funcs->get_mclk_od(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long int value;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled) {
+		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
+	} else if (adev->pm.funcs->set_mclk_od) {
+		adev->pm.funcs->set_mclk_od(adev, (uint32_t)value);
+		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
+		amdgpu_pm_compute_clocks(adev);
+	}
+
 fail:
 	return count;
 }
@@ -492,6 +600,12 @@ static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_dpm_pcie,
 		amdgpu_set_pp_dpm_pcie);
+static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_sclk_od,
+		amdgpu_set_pp_sclk_od);
+static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_mclk_od,
+		amdgpu_set_pp_mclk_od);
 
 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@@ -1110,22 +1224,34 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 			DRM_ERROR("failed to create device file pp_table\n");
 			return ret;
 		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_sclk\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_mclk\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_pcie\n");
-			return ret;
-		}
 	}
+
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_sclk\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_mclk\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_pcie\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_sclk_od\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_mclk_od);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_mclk_od\n");
+		return ret;
+	}
+
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -1148,10 +1274,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 		device_remove_file(adev->dev, &dev_attr_pp_cur_state);
 		device_remove_file(adev->dev, &dev_attr_pp_force_state);
 		device_remove_file(adev->dev, &dev_attr_pp_table);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
 	}
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
+	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 }
 
 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index 6bd961fb43dc..c5738a22b690 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -52,6 +52,7 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev)
 		pp_init->chip_family = adev->family;
 		pp_init->chip_id = adev->asic_type;
 		pp_init->device = amdgpu_cgs_create_device(adev);
+		pp_init->powercontainment_enabled = amdgpu_powercontainment;
 
 		ret = amd_powerplay_init(pp_init, amd_pp);
 		kfree(pp_init);
@@ -183,13 +184,6 @@ static int amdgpu_pp_sw_fini(void *handle)
 	if (ret)
 		return ret;
 
-#ifdef CONFIG_DRM_AMD_POWERPLAY
-	if (adev->pp_enabled) {
-		amdgpu_pm_sysfs_fini(adev);
-		amd_powerplay_fini(adev->powerplay.pp_handle);
-	}
-#endif
-
 	return ret;
 }
 
@@ -223,6 +217,22 @@ static int amdgpu_pp_hw_fini(void *handle)
 	return ret;
 }
 
+static void amdgpu_pp_late_fini(void *handle)
+{
+#ifdef CONFIG_DRM_AMD_POWERPLAY
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (adev->pp_enabled) {
+		amdgpu_pm_sysfs_fini(adev);
+		amd_powerplay_fini(adev->powerplay.pp_handle);
+	}
+
+	if (adev->powerplay.ip_funcs->late_fini)
+		adev->powerplay.ip_funcs->late_fini(
+			  adev->powerplay.pp_handle);
+#endif
+}
+
 static int amdgpu_pp_suspend(void *handle)
 {
 	int ret = 0;
@@ -311,6 +321,7 @@ const struct amd_ip_funcs amdgpu_pp_ip_funcs = {
 	.sw_fini = amdgpu_pp_sw_fini,
 	.hw_init = amdgpu_pp_hw_init,
 	.hw_fini = amdgpu_pp_hw_fini,
+	.late_fini = amdgpu_pp_late_fini,
 	.suspend = amdgpu_pp_suspend,
 	.resume = amdgpu_pp_resume,
 	.is_idle = amdgpu_pp_is_idle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 3b02272db678..3b885e3e9b56 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -28,6 +28,7 @@
  */
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/debugfs.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -48,6 +49,7 @@
  */
 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring);
+static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);
 
 /**
  * amdgpu_ring_alloc - allocate space on the ring buffer
@@ -140,78 +142,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
 }
 
 /**
- * amdgpu_ring_backup - Back up the content of a ring
- *
- * @ring: the ring we want to back up
- *
- * Saves all unprocessed commits from a ring, returns the number of dwords saved.
- */
-unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
-			    uint32_t **data)
-{
-	unsigned size, ptr, i;
-
-	*data = NULL;
-
-	if (ring->ring_obj == NULL)
-		return 0;
-
-	/* it doesn't make sense to save anything if all fences are signaled */
-	if (!amdgpu_fence_count_emitted(ring))
-		return 0;
-
-	ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
-
-	size = ring->wptr + (ring->ring_size / 4);
-	size -= ptr;
-	size &= ring->ptr_mask;
-	if (size == 0)
-		return 0;
-
-	/* and then save the content of the ring */
-	*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
-	if (!*data)
-		return 0;
-	for (i = 0; i < size; ++i) {
-		(*data)[i] = ring->ring[ptr++];
-		ptr &= ring->ptr_mask;
-	}
-
-	return size;
-}
-
-/**
- * amdgpu_ring_restore - append saved commands to the ring again
- *
- * @ring: ring to append commands to
- * @size: number of dwords we want to write
- * @data: saved commands
- *
- * Allocates space on the ring and restore the previously saved commands.
- */
-int amdgpu_ring_restore(struct amdgpu_ring *ring,
-			unsigned size, uint32_t *data)
-{
-	int i, r;
-
-	if (!size || !data)
-		return 0;
-
-	/* restore the saved ring content */
-	r = amdgpu_ring_alloc(ring, size);
-	if (r)
-		return r;
-
-	for (i = 0; i < size; ++i) {
-		amdgpu_ring_write(ring, data[i]);
-	}
-
-	amdgpu_ring_commit(ring);
-	kfree(data);
-	return 0;
-}
-
-/**
  * amdgpu_ring_init - init driver ring struct.
  *
  * @adev: amdgpu_device pointer
@@ -260,14 +190,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		return r;
 	}
 
-	r = amdgpu_wb_get(adev, &ring->next_rptr_offs);
-	if (r) {
-		dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r);
-		return r;
-	}
-	ring->next_rptr_gpu_addr = adev->wb.gpu_addr + ring->next_rptr_offs * 4;
-	ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
-
 	r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
 	if (r) {
 		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
@@ -310,6 +232,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		}
 		r = amdgpu_bo_kmap(ring->ring_obj,
 				       (void **)&ring->ring);
+
+		memset((void *)ring->ring, 0, ring->ring_size);
+
 		amdgpu_bo_unreserve(ring->ring_obj);
 		if (r) {
 			dev_err(adev->dev, "(%d) ring map failed\n", r);
@@ -343,10 +268,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	ring->ring = NULL;
 	ring->ring_obj = NULL;
 
+	amdgpu_wb_free(ring->adev, ring->cond_exe_offs);
 	amdgpu_wb_free(ring->adev, ring->fence_offs);
 	amdgpu_wb_free(ring->adev, ring->rptr_offs);
 	amdgpu_wb_free(ring->adev, ring->wptr_offs);
-	amdgpu_wb_free(ring->adev, ring->next_rptr_offs);
 
 	if (ring_obj) {
 		r = amdgpu_bo_reserve(ring_obj, false);
@@ -357,6 +282,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 		}
 		amdgpu_bo_unref(&ring_obj);
 	}
+	amdgpu_debugfs_ring_fini(ring);
 }
 
 /*
@@ -364,57 +290,62 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
  */
 #if defined(CONFIG_DEBUG_FS)
 
-static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
+/* Layout of file is 12 bytes consisting of
+ * - rptr
+ * - wptr
+ * - driver's copy of wptr
+ *
+ * followed by n-words of ring data
+ */
+static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct amdgpu_device *adev = dev->dev_private;
-	int roffset = (unsigned long)node->info_ent->data;
-	struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
-	uint32_t rptr, wptr, rptr_next;
-	unsigned i;
-
-	wptr = amdgpu_ring_get_wptr(ring);
-	seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
-
-	rptr = amdgpu_ring_get_rptr(ring);
-	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
-
-	seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
-
-	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
-		   ring->wptr, ring->wptr);
-
-	if (!ring->ready)
-		return 0;
-
-	/* print 8 dw before current rptr as often it's the last executed
-	 * packet that is the root issue
-	 */
-	i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
-	while (i != rptr) {
-		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (i == rptr)
-			seq_puts(m, " *");
-		if (i == rptr_next)
-			seq_puts(m, " #");
-		seq_puts(m, "\n");
-		i = (i + 1) & ring->ptr_mask;
+	struct amdgpu_ring *ring = (struct amdgpu_ring*)f->f_inode->i_private;
+	int r, i;
+	uint32_t value, result, early[3];
+
+	if (*pos & 3 || size & 3)
+		return -EINVAL;
+
+	result = 0;
+
+	if (*pos < 12) {
+		early[0] = amdgpu_ring_get_rptr(ring);
+		early[1] = amdgpu_ring_get_wptr(ring);
+		early[2] = ring->wptr;
+		for (i = *pos / 4; i < 3 && size; i++) {
+			r = put_user(early[i], (uint32_t *)buf);
+			if (r)
+				return r;
+			buf += 4;
+			result += 4;
+			size -= 4;
+			*pos += 4;
+		}
 	}
-	while (i != wptr) {
-		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (i == rptr)
-			seq_puts(m, " *");
-		if (i == rptr_next)
-			seq_puts(m, " #");
-		seq_puts(m, "\n");
-		i = (i + 1) & ring->ptr_mask;
+
+	while (size) {
+		if (*pos >= (ring->ring_size + 12))
+			return result;
+			
+		value = ring->ring[(*pos - 12)/4];
+		r = put_user(value, (uint32_t*)buf);
+		if (r)
+			return r;
+		buf += 4;
+		result += 4;
+		size -= 4;
+		*pos += 4;
 	}
-	return 0;
+
+	return result;
 }
 
-static struct drm_info_list amdgpu_debugfs_ring_info_list[AMDGPU_MAX_RINGS];
-static char amdgpu_debugfs_ring_names[AMDGPU_MAX_RINGS][32];
+static const struct file_operations amdgpu_debugfs_ring_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_ring_read,
+	.llseek = default_llseek
+};
 
 #endif
 
@@ -422,28 +353,27 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring)
 {
 #if defined(CONFIG_DEBUG_FS)
-	unsigned offset = (uint8_t*)ring - (uint8_t*)adev;
-	unsigned i;
-	struct drm_info_list *info;
-	char *name;
-
-	for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) {
-		info = &amdgpu_debugfs_ring_info_list[i];
-		if (!info->data)
-			break;
-	}
+	struct drm_minor *minor = adev->ddev->primary;
+	struct dentry *ent, *root = minor->debugfs_root;
+	char name[32];
 
-	if (i == ARRAY_SIZE(amdgpu_debugfs_ring_info_list))
-		return -ENOSPC;
-
-	name = &amdgpu_debugfs_ring_names[i][0];
 	sprintf(name, "amdgpu_ring_%s", ring->name);
-	info->name = name;
-	info->show = amdgpu_debugfs_ring_info;
-	info->driver_features = 0;
-	info->data = (void*)(uintptr_t)offset;
 
-	return amdgpu_debugfs_add_files(adev, info, 1);
+	ent = debugfs_create_file(name,
+				  S_IFREG | S_IRUGO, root,
+				  ring, &amdgpu_debugfs_ring_fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	i_size_write(ent->d_inode, ring->ring_size + 12);
+	ring->ent = ent;
 #endif
 	return 0;
 }
+
+static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
+{
+#if defined(CONFIG_DEBUG_FS)
+	debugfs_remove(ring->ent);
+#endif
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index b16366c2b4a0..d8af37a845f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -115,6 +115,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev,
 		return r;
 	}
 	r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
+	memset(sa_manager->cpu_ptr, 0, sa_manager->size);
 	amdgpu_bo_unreserve(sa_manager->bo);
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 34a92808bbd4..5c8d3022fb87 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -223,13 +223,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_sync_is_idle - test if all fences are signaled
+ * amdgpu_sync_peek_fence - get the next fence not signaled yet
  *
  * @sync: the sync object
+ * @ring: optional ring to use for test
  *
- * Returns true if all fences in the sync object are signaled.
+ * Returns the next fence not signaled yet without removing it from the sync
+ * object.
  */
-bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
+struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
@@ -237,6 +240,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
 
 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		struct fence *f = e->fence;
+		struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
+		if (ring && s_fence) {
+			/* For fences from the same ring it is sufficient
+			 * when they are scheduled.
+			 */
+			if (s_fence->sched == &ring->sched) {
+				if (fence_is_signaled(&s_fence->scheduled))
+					continue;
+
+				return &s_fence->scheduled;
+			}
+		}
 
 		if (fence_is_signaled(f)) {
 			hash_del(&e->node);
@@ -245,58 +261,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
 			continue;
 		}
 
-		return false;
+		return f;
 	}
 
-	return true;
+	return NULL;
 }
 
 /**
- * amdgpu_sync_cycle_fences - move fences from one sync object into another
+ * amdgpu_sync_get_fence - get the next fence from the sync object
  *
- * @dst: the destination sync object
- * @src: the source sync object
- * @fence: fence to add to source
+ * @sync: sync object to use
  *
- * Remove all fences from source and put them into destination and add
- * fence as new one into source.
+ * Get and removes the next fence from the sync object not signaled yet.
  */
-int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
-			     struct fence *fence)
-{
-	struct amdgpu_sync_entry *e, *newone;
-	struct hlist_node *tmp;
-	int i;
-
-	/* Allocate the new entry before moving the old ones */
-	newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
-	if (!newone)
-		return -ENOMEM;
-
-	hash_for_each_safe(src->fences, i, tmp, e, node) {
-		struct fence *f = e->fence;
-
-		hash_del(&e->node);
-		if (fence_is_signaled(f)) {
-			fence_put(f);
-			kmem_cache_free(amdgpu_sync_slab, e);
-			continue;
-		}
-
-		if (amdgpu_sync_add_later(dst, f)) {
-			kmem_cache_free(amdgpu_sync_slab, e);
-			continue;
-		}
-
-		hash_add(dst->fences, &e->node, f->context);
-	}
-
-	hash_add(src->fences, &newone->node, fence->context);
-	newone->fence = fence_get(fence);
-
-	return 0;
-}
-
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
@@ -319,25 +296,6 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 	return NULL;
 }
 
-int amdgpu_sync_wait(struct amdgpu_sync *sync)
-{
-	struct amdgpu_sync_entry *e;
-	struct hlist_node *tmp;
-	int i, r;
-
-	hash_for_each_safe(sync->fences, i, tmp, e, node) {
-		r = fence_wait(e->fence, false);
-		if (r)
-			return r;
-
-		hash_del(&e->node);
-		fence_put(e->fence);
-		kmem_cache_free(amdgpu_sync_slab, e);
-	}
-
-	return 0;
-}
-
 /**
  * amdgpu_sync_free - free the sync object
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 26a5f4acf584..499803f3ce3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -11,19 +11,68 @@
 #define TRACE_SYSTEM amdgpu
 #define TRACE_INCLUDE_FILE amdgpu_trace
 
+TRACE_EVENT(amdgpu_mm_rreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_mm_wreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
 TRACE_EVENT(amdgpu_bo_create,
 	    TP_PROTO(struct amdgpu_bo *bo),
 	    TP_ARGS(bo),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_bo *, bo)
 			     __field(u32, pages)
+			     __field(u32, type)
+			     __field(u32, prefer)
+			     __field(u32, allow)
+			     __field(u32, visible)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->bo = bo;
 			   __entry->pages = bo->tbo.num_pages;
+			   __entry->type = bo->tbo.mem.mem_type;
+			   __entry->prefer = bo->prefered_domains;
+			   __entry->allow = bo->allowed_domains;
+			   __entry->visible = bo->flags;
 			   ),
-	    TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages)
+
+	    TP_printk("bo=%p,pages=%u,type=%d,prefered=%d,allowed=%d,visible=%d",
+		       __entry->bo, __entry->pages, __entry->type,
+		       __entry->prefer, __entry->allow, __entry->visible)
 );
 
 TRACE_EVENT(amdgpu_cs,
@@ -64,7 +113,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			   __entry->adev = job->adev;
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
-			   __entry->fence = &job->base.s_fence->base;
+			   __entry->fence = &job->base.s_fence->finished;
 			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
@@ -89,7 +138,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			   __entry->adev = job->adev;
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
-			   __entry->fence = &job->base.s_fence->base;
+			   __entry->fence = &job->base.s_fence->finished;
 			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
@@ -244,13 +293,55 @@ TRACE_EVENT(amdgpu_bo_list_set,
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_bo_list *, list)
 			     __field(struct amdgpu_bo *, bo)
+			     __field(u64, bo_size)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->list = list;
 			   __entry->bo = bo;
+			   __entry->bo_size = amdgpu_bo_size(bo);
 			   ),
-	    TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
+	    TP_printk("list=%p, bo=%p, bo_size = %Ld",
+		      __entry->list,
+		      __entry->bo,
+		      __entry->bo_size)
+);
+
+TRACE_EVENT(amdgpu_cs_bo_status,
+	    TP_PROTO(uint64_t total_bo, uint64_t total_size),
+	    TP_ARGS(total_bo, total_size),
+	    TP_STRUCT__entry(
+			__field(u64, total_bo)
+			__field(u64, total_size)
+			),
+
+	    TP_fast_assign(
+			__entry->total_bo = total_bo;
+			__entry->total_size = total_size;
+			),
+	    TP_printk("total bo size = %Ld, total bo count = %Ld",
+			__entry->total_bo, __entry->total_size)
+);
+
+TRACE_EVENT(amdgpu_ttm_bo_move,
+	    TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
+	    TP_ARGS(bo, new_placement, old_placement),
+	    TP_STRUCT__entry(
+			__field(struct amdgpu_bo *, bo)
+			__field(u64, bo_size)
+			__field(u32, new_placement)
+			__field(u32, old_placement)
+			),
+
+	    TP_fast_assign(
+			__entry->bo      = bo;
+			__entry->bo_size = amdgpu_bo_size(bo);
+			__entry->new_placement = new_placement;
+			__entry->old_placement = old_placement;
+			),
+	    TP_printk("bo=%p from:%d to %d with size = %Ld",
+			__entry->bo, __entry->old_placement,
+			__entry->new_placement, __entry->bo_size)
 );
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 3b9053af4762..b7742e62972a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -286,9 +286,10 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	r = amdgpu_copy_buffer(ring, old_start, new_start,
 			       new_mem->num_pages * PAGE_SIZE, /* bytes */
 			       bo->resv, &fence);
-	/* FIXME: handle copy error */
-	r = ttm_bo_move_accel_cleanup(bo, fence,
-				      evict, no_wait_gpu, new_mem);
+	if (r)
+		return r;
+
+	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
 	fence_put(fence);
 	return r;
 }
@@ -396,6 +397,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 		return -EINVAL;
 
 	adev = amdgpu_get_adev(bo->bdev);
+
+	/* remember the eviction */
+	if (evict)
+		atomic64_inc(&adev->num_evictions);
+
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		amdgpu_move_null(bo, new_mem);
 		return 0;
@@ -429,7 +435,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, interruptible,
+				       no_wait_gpu, new_mem);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 01abfc21b4a2..e19520c4b4b6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -253,19 +253,20 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
 	int r;
 
-	if (adev->uvd.vcpu_bo == NULL)
-		return 0;
+	kfree(adev->uvd.saved_bo);
 
 	amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
 
-	r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
-	if (!r) {
-		amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
-		amdgpu_bo_unpin(adev->uvd.vcpu_bo);
-		amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
-	}
+	if (adev->uvd.vcpu_bo) {
+		r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
+		if (!r) {
+			amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
+			amdgpu_bo_unpin(adev->uvd.vcpu_bo);
+			amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
+		}
 
-	amdgpu_bo_unref(&adev->uvd.vcpu_bo);
+		amdgpu_bo_unref(&adev->uvd.vcpu_bo);
+	}
 
 	amdgpu_ring_fini(&adev->uvd.ring);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9f36ed30ba11..2f8496d48c94 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -25,6 +25,7 @@
  *          Alex Deucher
  *          Jerome Glisse
  */
+#include <linux/fence-array.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -114,16 +115,26 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 /**
  * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
  *
+ * @adev: amdgpu device pointer
  * @vm: vm providing the BOs
  * @duplicates: head of duplicates list
  *
  * Add the page directory to the BO duplicates list
  * for command submission.
  */
-void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates)
+void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct list_head *duplicates)
 {
+	uint64_t num_evictions;
 	unsigned i;
 
+	/* We only need to validate the page tables
+	 * if they aren't already valid.
+	 */
+	num_evictions = atomic64_read(&adev->num_evictions);
+	if (num_evictions == vm->last_eviction_counter)
+		return;
+
 	/* add the vm page table to the list */
 	for (i = 0; i <= vm->max_pde_used; ++i) {
 		struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
@@ -162,6 +173,13 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 	spin_unlock(&glob->lru_lock);
 }
 
+static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
+			      struct amdgpu_vm_id *id)
+{
+	return id->current_gpu_reset_count !=
+		atomic_read(&adev->gpu_reset_counter) ? true : false;
+}
+
 /**
  * amdgpu_vm_grab_id - allocate the next free VMID
  *
@@ -174,20 +192,69 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
  */
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
-		      unsigned *vm_id, uint64_t *vm_pd_addr)
+		      struct amdgpu_job *job)
 {
-	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
 	struct fence *updates = sync->last_vm_update;
-	struct amdgpu_vm_id *id;
-	unsigned i = ring->idx;
-	int r;
+	struct amdgpu_vm_id *id, *idle;
+	struct fence **fences;
+	unsigned i;
+	int r = 0;
+
+	fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids,
+			       GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
 
 	mutex_lock(&adev->vm_manager.lock);
 
+	/* Check if we have an idle VMID */
+	i = 0;
+	list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) {
+		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
+		if (!fences[i])
+			break;
+		++i;
+	}
+
+	/* If we can't find a idle VMID to use, wait till one becomes available */
+	if (&idle->list == &adev->vm_manager.ids_lru) {
+		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
+		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
+		struct fence_array *array;
+		unsigned j;
+
+		for (j = 0; j < i; ++j)
+			fence_get(fences[j]);
+
+		array = fence_array_create(i, fences, fence_context,
+					   seqno, true);
+		if (!array) {
+			for (j = 0; j < i; ++j)
+				fence_put(fences[j]);
+			kfree(fences);
+			r = -ENOMEM;
+			goto error;
+		}
+
+
+		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
+		fence_put(&array->base);
+		if (r)
+			goto error;
+
+		mutex_unlock(&adev->vm_manager.lock);
+		return 0;
+
+	}
+	kfree(fences);
+
+	job->vm_needs_flush = true;
 	/* Check if we can use a VMID already assigned to this VM */
+	i = ring->idx;
 	do {
 		struct fence *flushed;
+		bool same_ring = ring->idx == i;
 
 		id = vm->ids[i++];
 		if (i == AMDGPU_MAX_RINGS)
@@ -196,67 +263,49 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		/* Check all the prerequisites to using this VMID */
 		if (!id)
 			continue;
+		if (amdgpu_vm_is_gpu_reset(adev, id))
+			continue;
 
 		if (atomic64_read(&id->owner) != vm->client_id)
 			continue;
 
-		if (pd_addr != id->pd_gpu_addr)
+		if (job->vm_pd_addr != id->pd_gpu_addr)
 			continue;
 
-		if (id->last_user != ring &&
+		if (!same_ring &&
 		    (!id->last_flush || !fence_is_signaled(id->last_flush)))
 			continue;
 
 		flushed  = id->flushed_updates;
-		if (updates && (!flushed || fence_is_later(updates, flushed)))
+		if (updates &&
+		    (!flushed || fence_is_later(updates, flushed)))
 			continue;
 
-		/* Good we can use this VMID */
-		if (id->last_user == ring) {
-			r = amdgpu_sync_fence(ring->adev, sync,
-					      id->first);
-			if (r)
-				goto error;
-		}
-
-		/* And remember this submission as user of the VMID */
+		/* Good we can use this VMID. Remember this submission as
+		 * user of the VMID.
+		 */
 		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
 		if (r)
 			goto error;
 
+		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 		vm->ids[ring->idx] = id;
 
-		*vm_id = id - adev->vm_manager.ids;
-		*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+		job->vm_id = id - adev->vm_manager.ids;
+		job->vm_needs_flush = false;
+		trace_amdgpu_vm_grab_id(vm, ring->idx, job->vm_id, job->vm_pd_addr);
 
 		mutex_unlock(&adev->vm_manager.lock);
 		return 0;
 
 	} while (i != ring->idx);
 
-	id = list_first_entry(&adev->vm_manager.ids_lru,
-			      struct amdgpu_vm_id,
-			      list);
+	/* Still no ID to use? Then use the idle one found earlier */
+	id = idle;
 
-	if (!amdgpu_sync_is_idle(&id->active)) {
-		struct list_head *head = &adev->vm_manager.ids_lru;
-		struct amdgpu_vm_id *tmp;
-
-		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
-					 list) {
-			if (amdgpu_sync_is_idle(&id->active)) {
-				list_move(&id->list, head);
-				head = &id->list;
-			}
-		}
-		id = list_first_entry(&adev->vm_manager.ids_lru,
-				      struct amdgpu_vm_id,
-				      list);
-	}
-
-	r = amdgpu_sync_cycle_fences(sync, &id->active, fence);
+	/* Remember this submission as user of the VMID */
+	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
 	if (r)
 		goto error;
 
@@ -269,22 +318,46 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	fence_put(id->flushed_updates);
 	id->flushed_updates = fence_get(updates);
 
-	id->pd_gpu_addr = pd_addr;
-
+	id->pd_gpu_addr = job->vm_pd_addr;
+	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-	id->last_user = ring;
 	atomic64_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;
 
-	*vm_id = id - adev->vm_manager.ids;
-	*vm_pd_addr = pd_addr;
-	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+	job->vm_id = id - adev->vm_manager.ids;
+	trace_amdgpu_vm_grab_id(vm, ring->idx, job->vm_id, job->vm_pd_addr);
 
 error:
 	mutex_unlock(&adev->vm_manager.lock);
 	return r;
 }
 
+static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	const struct amdgpu_ip_block_version *ip_block;
+
+	if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
+		/* only compute rings */
+		return false;
+
+	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+	if (!ip_block)
+		return false;
+
+	if (ip_block->major <= 7) {
+		/* gfx7 has no workaround */
+		return true;
+	} else if (ip_block->major == 8) {
+		if (adev->gfx.mec_fw_version >= 673)
+			/* gfx8 is fixed in MEC firmware 673 */
+			return false;
+		else
+			return true;
+	}
+	return false;
+}
+
 /**
  * amdgpu_vm_flush - hardware flush the vm
  *
@@ -294,59 +367,52 @@ error:
  *
  * Emit a VM flush when it is necessary.
  */
-int amdgpu_vm_flush(struct amdgpu_ring *ring,
-		    unsigned vm_id, uint64_t pd_addr,
-		    uint32_t gds_base, uint32_t gds_size,
-		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
-		id->gds_base != gds_base ||
-		id->gds_size != gds_size ||
-		id->gws_base != gws_base ||
-		id->gws_size != gws_size ||
-		id->oa_base != oa_base ||
-		id->oa_size != oa_size);
+		id->gds_base != job->gds_base ||
+		id->gds_size != job->gds_size ||
+		id->gws_base != job->gws_base ||
+		id->gws_size != job->gws_size ||
+		id->oa_base != job->oa_base ||
+		id->oa_size != job->oa_size);
 	int r;
 
 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
-		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
+	    job->vm_needs_flush || gds_switch_needed ||
+	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
 		amdgpu_ring_emit_pipeline_sync(ring);
 
-	if (ring->funcs->emit_vm_flush &&
-	    pd_addr != AMDGPU_VM_NO_FLUSH) {
+	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
+	    amdgpu_vm_is_gpu_reset(adev, id))) {
 		struct fence *fence;
 
-		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
-		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+		trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id);
+		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+
+		r = amdgpu_fence_emit(ring, &fence);
+		if (r)
+			return r;
 
 		mutex_lock(&adev->vm_manager.lock);
-		if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
-			r = amdgpu_fence_emit(ring, &fence);
-			if (r) {
-				mutex_unlock(&adev->vm_manager.lock);
-				return r;
-			}
-			fence_put(id->last_flush);
-			id->last_flush = fence;
-		}
+		fence_put(id->last_flush);
+		id->last_flush = fence;
 		mutex_unlock(&adev->vm_manager.lock);
 	}
 
 	if (gds_switch_needed) {
-		id->gds_base = gds_base;
-		id->gds_size = gds_size;
-		id->gws_base = gws_base;
-		id->gws_size = gws_size;
-		id->oa_base = oa_base;
-		id->oa_size = oa_size;
-		amdgpu_ring_emit_gds_switch(ring, vm_id,
-					    gds_base, gds_size,
-					    gws_base, gws_size,
-					    oa_base, oa_size);
+		id->gds_base = job->gds_base;
+		id->gds_size = job->gds_size;
+		id->gws_base = job->gws_base;
+		id->gws_size = job->gws_size;
+		id->oa_base = job->oa_base;
+		id->oa_size = job->oa_size;
+		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
+					    job->gds_base, job->gds_size,
+					    job->gws_base, job->gws_size,
+					    job->oa_base, job->oa_size);
 	}
 
 	return 0;
@@ -723,7 +789,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
  * @vm: requested vm
  * @start: start of GPU address range
  * @end: end of GPU address range
- * @dst: destination address to map to
+ * @dst: destination address to map to, the next dst inside the function
  * @flags: mapping flags
  *
  * Update the page tables in the range @start - @end.
@@ -737,49 +803,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 {
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
 
-	uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
-	uint64_t addr;
+	uint64_t cur_pe_start, cur_pe_end, cur_dst;
+	uint64_t addr; /* next GPU address to be updated */
+	uint64_t pt_idx;
+	struct amdgpu_bo *pt;
+	unsigned nptes; /* next number of ptes to be updated */
+	uint64_t next_pe_start;
+
+	/* initialize the variables */
+	addr = start;
+	pt_idx = addr >> amdgpu_vm_block_size;
+	pt = vm->page_tables[pt_idx].entry.robj;
+
+	if ((addr & ~mask) == (end & ~mask))
+		nptes = end - addr;
+	else
+		nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
+
+	cur_pe_start = amdgpu_bo_gpu_offset(pt);
+	cur_pe_start += (addr & mask) * 8;
+	cur_pe_end = cur_pe_start + 8 * nptes;
+	cur_dst = dst;
+
+	/* for next ptb*/
+	addr += nptes;
+	dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 
 	/* walk over the address space and update the page tables */
-	for (addr = start; addr < end; ) {
-		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
-		struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
-		unsigned nptes;
-		uint64_t pe_start;
+	while (addr < end) {
+		pt_idx = addr >> amdgpu_vm_block_size;
+		pt = vm->page_tables[pt_idx].entry.robj;
 
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
 
-		pe_start = amdgpu_bo_gpu_offset(pt);
-		pe_start += (addr & mask) * 8;
-
-		if (last_pe_end != pe_start) {
+		next_pe_start = amdgpu_bo_gpu_offset(pt);
+		next_pe_start += (addr & mask) * 8;
 
+		if (cur_pe_end == next_pe_start) {
+			/* The next ptb is consecutive to current ptb.
+			 * Don't call amdgpu_vm_frag_ptes now.
+			 * Will update two ptbs together in future.
+			*/
+			cur_pe_end += 8 * nptes;
+		} else {
 			amdgpu_vm_frag_ptes(adev, vm_update_params,
-					    last_pe_start, last_pe_end,
-					    last_dst, flags);
+					    cur_pe_start, cur_pe_end,
+					    cur_dst, flags);
 
-			last_pe_start = pe_start;
-			last_pe_end = pe_start + 8 * nptes;
-			last_dst = dst;
-		} else {
-			last_pe_end += 8 * nptes;
+			cur_pe_start = next_pe_start;
+			cur_pe_end = next_pe_start + 8 * nptes;
+			cur_dst = dst;
 		}
 
+		/* for next ptb*/
 		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start,
-			    last_pe_end, last_dst, flags);
+	amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
+			    cur_pe_end, cur_dst, flags);
 }
 
 /**
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
  * @adev: amdgpu_device pointer
+ * @exclusive: fence we need to sync to
  * @src: address where to copy page table entries from
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
@@ -793,6 +885,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+				       struct fence *exclusive,
 				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
@@ -853,6 +946,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	vm_update_params.ib = &job->ibs[0];
 
+	r = amdgpu_sync_fence(adev, &job->sync, exclusive);
+	if (r)
+		goto error_free;
+
 	r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
 			     owner);
 	if (r)
@@ -889,6 +986,7 @@ error_free:
  * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
  *
  * @adev: amdgpu_device pointer
+ * @exclusive: fence we need to sync to
  * @gtt_flags: flags as they are used for GTT
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
@@ -902,6 +1000,7 @@ error_free:
  * Returns 0 for success, -EINVAL for failure.
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+				      struct fence *exclusive,
 				      uint32_t gtt_flags,
 				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
@@ -932,7 +1031,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	addr += mapping->offset;
 
 	if (!pages_addr || src)
-		return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+		return amdgpu_vm_bo_update_mapping(adev, exclusive,
+						   src, pages_addr, vm,
 						   start, mapping->it.last,
 						   flags, addr, fence);
 
@@ -940,7 +1040,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		uint64_t last;
 
 		last = min((uint64_t)mapping->it.last, start + max_size - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
+						src, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@@ -973,6 +1074,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
 	uint32_t gtt_flags, flags;
+	struct fence *exclusive;
 	uint64_t addr;
 	int r;
 
@@ -994,8 +1096,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		default:
 			break;
 		}
+
+		exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
 	} else {
 		addr = 0;
+		exclusive = NULL;
 	}
 
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
@@ -1007,7 +1112,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm,
+		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
+					       gtt_flags, pages_addr, vm,
 					       mapping, flags, addr,
 					       &bo_va->last_pt_update);
 		if (r)
@@ -1054,7 +1160,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);
 
-		r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping,
+		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
 					       0, 0, NULL);
 		kfree(mapping);
 		if (r)
@@ -1445,6 +1551,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	amdgpu_bo_unreserve(vm->page_directory);
 	if (r)
 		goto error_free_page_directory;
+	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
 
 	return 0;
 
@@ -1516,6 +1623,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 			      &adev->vm_manager.ids_lru);
 	}
 
+	adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+		adev->vm_manager.seqno[i] = 0;
+
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index ea407db1fbcf..5c33ed862695 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -50,7 +50,9 @@
 #include "gmc/gmc_7_1_sh_mask.h"
 
 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
+MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
+MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
 
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
@@ -736,19 +738,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)
 
 	if (pi->caps_sq_ramping || pi->caps_db_ramping ||
 	    pi->caps_td_ramping || pi->caps_tcp_ramping) {
-		gfx_v7_0_enter_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 		if (enable) {
 			ret = ci_program_pt_config_registers(adev, didt_config_ci);
 			if (ret) {
-				gfx_v7_0_exit_rlc_safe_mode(adev);
+				adev->gfx.rlc.funcs->exit_safe_mode(adev);
 				return ret;
 			}
 		}
 
 		ci_do_enable_didt(adev, enable);
 
-		gfx_v7_0_exit_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}
 
 	return 0;
@@ -3636,6 +3638,10 @@ static int ci_setup_default_dpm_tables(struct amdgpu_device *adev)
 
 	ci_setup_default_pcie_tables(adev);
 
+	/* save a copy of the default DPM table */
+	memcpy(&(pi->golden_dpm_table), &(pi->dpm_table),
+			sizeof(struct ci_dpm_table));
+
 	return 0;
 }
 
@@ -5754,10 +5760,18 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)
 
 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
-		chip_name = "bonaire";
+		if ((adev->pdev->revision == 0x80) ||
+		    (adev->pdev->revision == 0x81) ||
+		    (adev->pdev->device == 0x665f))
+			chip_name = "bonaire_k";
+		else
+			chip_name = "bonaire";
 		break;
 	case CHIP_HAWAII:
-		chip_name = "hawaii";
+		if (adev->pdev->revision == 0x80)
+			chip_name = "hawaii_k";
+		else
+			chip_name = "hawaii";
 		break;
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
@@ -6221,6 +6235,9 @@ static int ci_dpm_sw_fini(void *handle)
 	ci_dpm_fini(adev);
 	mutex_unlock(&adev->pm.mutex);
 
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
@@ -6401,6 +6418,186 @@ static int ci_dpm_set_powergating_state(void *handle,
 	return 0;
 }
 
+static int ci_dpm_print_clock_levels(struct amdgpu_device *adev,
+		enum pp_clock_type type, char *buf)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table;
+	struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table;
+	struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table;
+
+	int i, now, size = 0;
+	uint32_t clock, pcie_speed;
+
+	switch (type) {
+	case PP_SCLK:
+		amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetSclkFrequency);
+		clock = RREG32(mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < sclk_table->count; i++) {
+			if (clock > sclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < sclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, sclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_MCLK:
+		amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetMclkFrequency);
+		clock = RREG32(mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < mclk_table->count; i++) {
+			if (clock > mclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < mclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, mclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_PCIE:
+		pcie_speed = ci_get_current_pcie_speed(adev);
+		for (i = 0; i < pcie_table->count; i++) {
+			if (pcie_speed != pcie_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < pcie_table->count; i++)
+			size += sprintf(buf + size, "%d: %s %s\n", i,
+					(pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" :
+					(pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
+					(pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
+					(i == now) ? "*" : "");
+		break;
+	default:
+		break;
+	}
+
+	return size;
+}
+
+static int ci_dpm_force_clock_level(struct amdgpu_device *adev,
+		enum pp_clock_type type, uint32_t mask)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+
+	if (adev->pm.dpm.forced_level
+			!= AMDGPU_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	switch (type) {
+	case PP_SCLK:
+		if (!pi->sclk_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_SCLKDPM_SetEnabledMask,
+					pi->dpm_level_enable_mask.sclk_dpm_enable_mask & mask);
+		break;
+
+	case PP_MCLK:
+		if (!pi->mclk_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_MCLKDPM_SetEnabledMask,
+					pi->dpm_level_enable_mask.mclk_dpm_enable_mask & mask);
+		break;
+
+	case PP_PCIE:
+	{
+		uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
+		uint32_t level = 0;
+
+		while (tmp >>= 1)
+			level++;
+
+		if (!pi->pcie_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_PCIeDPM_ForceLevel,
+					level);
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ci_dpm_get_sclk_od(struct amdgpu_device *adev)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table);
+	struct ci_single_dpm_table *golden_sclk_table =
+			&(pi->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
+	struct ci_single_dpm_table *golden_sclk_table =
+			&(pi->golden_dpm_table.sclk_table);
+
+	if (value > 20)
+		value = 20;
+
+	ps->performance_levels[ps->performance_level_count - 1].sclk =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int ci_dpm_get_mclk_od(struct amdgpu_device *adev)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table);
+	struct ci_single_dpm_table *golden_mclk_table =
+			&(pi->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
+	struct ci_single_dpm_table *golden_mclk_table =
+			&(pi->golden_dpm_table.mclk_table);
+
+	if (value > 20)
+		value = 20;
+
+	ps->performance_levels[ps->performance_level_count - 1].mclk =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
 	.name = "ci_dpm",
 	.early_init = ci_dpm_early_init,
@@ -6435,6 +6632,12 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
 	.get_fan_control_mode = &ci_dpm_get_fan_control_mode,
 	.set_fan_speed_percent = &ci_dpm_set_fan_speed_percent,
 	.get_fan_speed_percent = &ci_dpm_get_fan_speed_percent,
+	.print_clock_levels = ci_dpm_print_clock_levels,
+	.force_clock_level = ci_dpm_force_clock_level,
+	.get_sclk_od = ci_dpm_get_sclk_od,
+	.set_sclk_od = ci_dpm_set_sclk_od,
+	.get_mclk_od = ci_dpm_get_mclk_od,
+	.set_mclk_od = ci_dpm_set_mclk_od,
 };
 
 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
index faccc30c93bf..91be2996ae7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
@@ -193,6 +193,7 @@ struct ci_pt_defaults {
 
 struct ci_power_info {
 	struct ci_dpm_table dpm_table;
+	struct ci_dpm_table golden_dpm_table;
 	u32 voltage_control;
 	u32 mvdd_control;
 	u32 vddci_control;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 07bc795a4ca9..a7de4d18ac94 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -962,6 +962,12 @@ static bool cik_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 cik_get_virtual_caps(struct amdgpu_device *adev)
+{
+	/* CIK does not support SR-IOV */
+	return 0;
+}
+
 static const struct amdgpu_allowed_register_entry cik_allowed_read_registers[] = {
 	{mmGRBM_STATUS, false},
 	{mmGB_ADDR_CONFIG, false},
@@ -1029,12 +1035,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v7_0_select_se_sh(adev, se_num, sh_num);
+		amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
 
 	val = RREG32(reg_offset);
 
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 	return val;
 }
@@ -1152,10 +1158,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
 	WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute);
 }
 
-static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
+static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
 	struct kv_reset_save_regs kv_save = { 0 };
 	u32 i;
+	int r = -EINVAL;
 
 	dev_info(adev->dev, "GPU pci config reset\n");
 
@@ -1171,14 +1178,20 @@ static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff)
+		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+			/* enable BM */
+			pci_set_master(adev->pdev);
+			r = 0;
 			break;
+		}
 		udelay(1);
 	}
 
 	/* does asic init need to be run first??? */
 	if (adev->flags & AMD_IS_APU)
 		kv_restore_regs_for_reset(adev, &kv_save);
+
+	return r;
 }
 
 static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
@@ -1204,13 +1217,14 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
  */
 static int cik_asic_reset(struct amdgpu_device *adev)
 {
+	int r;
 	cik_set_bios_scratch_engine_hung(adev, true);
 
-	cik_gpu_pci_config_reset(adev);
+	r = cik_gpu_pci_config_reset(adev);
 
 	cik_set_bios_scratch_engine_hung(adev, false);
 
-	return 0;
+	return r;
 }
 
 static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@@ -2007,9 +2021,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.get_xclk = &cik_get_xclk,
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
-	/* these should be moved to their own ip modules */
-	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
-	.wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle,
+	.get_virtual_caps = &cik_get_virtual_caps,
 };
 
 static int cik_common_early_init(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 518dca43b133..46aca16a40aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -66,6 +66,16 @@ MODULE_FIRMWARE("radeon/mullins_sdma1.bin");
 
 u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev);
 
+
+static void cik_sdma_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+			release_firmware(adev->sdma.instance[i].fw);
+			adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /*
  * sDMA - System DMA
  * Starting with CIK, the GPU has new asynchronous
@@ -214,17 +224,6 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
 				  unsigned vm_id, bool ctx_switch)
 {
 	u32 extra_bits = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 4)
-		next_rptr++;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, 1); /* number of DWs to follow */
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
@@ -419,6 +418,8 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -446,7 +447,12 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
+
+	cik_sdma_enable(adev, true);
 
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -529,8 +535,8 @@ static int cik_sdma_start(struct amdgpu_device *adev)
 	if (r)
 		return r;
 
-	/* unhalt the MEs */
-	cik_sdma_enable(adev, true);
+	/* halt the engine before programing */
+	cik_sdma_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = cik_sdma_gfx_resume(adev);
@@ -998,6 +1004,7 @@ static int cik_sdma_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	cik_sdma_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 933e425a8154..8ba07e79d4cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -2219,6 +2219,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
 			}
 		}
 	} else { /*pi->caps_vce_pg*/
+		pi->vce_power_gated = gate;
 		cz_update_vce_dpm(adev);
 		cz_enable_vce_dpm(adev, !gate);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index c90408bc0fde..d4bf133908b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -307,11 +307,10 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
 	u32 tmp;
 
-	/* flip at hsync for async, default is vsync */
-	/* use UPDATE_IMMEDIATE_EN instead for async? */
+	/* flip immediate for async, default is vsync */
 	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
 	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
-			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
+			    GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
 	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 	/* update the scanout addresses */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 300ff4aab0fd..4fdfab1e9200 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -526,36 +526,16 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
 		crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
 					     CRTC_CONTROL, CRTC_MASTER_EN);
 		if (crtc_enabled) {
-#if 0
-			u32 frame_count;
-			int j;
-
+#if 1
 			save->crtc_enabled[i] = true;
 			tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
 			if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) {
-				amdgpu_display_vblank_wait(adev, i);
-				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+				/*it is correct only for RGB ; black is 0*/
+				WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0);
 				tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1);
 				WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-			}
-			/* wait for the next frame */
-			frame_count = amdgpu_display_vblank_get_counter(adev, i);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-					break;
-				udelay(1);
-			}
-			tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) {
-				tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-				WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1);
-				WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
 			}
+			mdelay(20);
 #else
 			/* XXX this is a hack to avoid strange behavior with EFI on certain systems */
 			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
@@ -575,55 +555,22 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
 static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev,
 				      struct amdgpu_mode_mc_save *save)
 {
-	u32 tmp, frame_count;
-	int i, j;
+	u32 tmp;
+	int i;
 
 	/* update crtc base addresses */
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
 		       upper_32_bits(adev->mc.vram_start));
-		WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-		       upper_32_bits(adev->mc.vram_start));
 		WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
 		       (u32)adev->mc.vram_start);
-		WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
-		       (u32)adev->mc.vram_start);
 
 		if (save->crtc_enabled[i]) {
-			tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 3) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 3);
-				WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) {
-				tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-				WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0);
-				WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
-			}
-			for (j = 0; j < adev->usec_timeout; j++) {
-				tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-				if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0)
-					break;
-				udelay(1);
-			}
 			tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
 			tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0);
-			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
 			WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-			/* wait for the next frame */
-			frame_count = amdgpu_display_vblank_get_counter(adev, i);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-					break;
-				udelay(1);
-			}
 		}
+		mdelay(20);
 	}
 
 	WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
index 245cabf06575..ed03b75175d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_dpm.c
@@ -72,6 +72,11 @@ static int fiji_dpm_sw_init(void *handle)
 
 static int fiji_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 7f18a53ab53a..f6bd9465dbdc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -991,6 +991,22 @@ out:
 	return err;
 }
 
+static void gfx_v7_0_free_microcode(struct amdgpu_device *adev)
+{
+	release_firmware(adev->gfx.pfp_fw);
+	adev->gfx.pfp_fw = NULL;
+	release_firmware(adev->gfx.me_fw);
+	adev->gfx.me_fw = NULL;
+	release_firmware(adev->gfx.ce_fw);
+	adev->gfx.ce_fw = NULL;
+	release_firmware(adev->gfx.mec_fw);
+	adev->gfx.mec_fw = NULL;
+	release_firmware(adev->gfx.mec2_fw);
+	adev->gfx.mec2_fw = NULL;
+	release_firmware(adev->gfx.rlc_fw);
+	adev->gfx.rlc_fw = NULL;
+}
+
 /**
  * gfx_v7_0_tiling_mode_table_init - init the hw tiling table
  *
@@ -1567,9 +1583,15 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
  * registers are instanced per SE or SH.  0xffffffff means
  * broadcast to all SEs or SHs (CIK).
  */
-void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
+				  u32 se_num, u32 sh_num, u32 instance)
 {
-	u32 data = GRBM_GFX_INDEX__INSTANCE_BROADCAST_WRITES_MASK;
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
 
 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
 		data |= GRBM_GFX_INDEX__SH_BROADCAST_WRITES_MASK |
@@ -1643,13 +1665,13 @@ static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v7_0_get_rb_active_bitmap(adev);
 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
 					       rb_bitmap_width_per_sh);
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	adev->gfx.config.backend_enable_mask = active_rbs;
@@ -1730,7 +1752,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 	 * making sure that the following register writes will be broadcasted
 	 * to all the shaders
 	 */
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	/* XXX SH_MEM regs */
 	/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -2034,17 +2056,6 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	if (ctx_switch)
-		next_rptr += 2;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
 	if (ctx_switch) {
@@ -2073,22 +2084,9 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_ib *ib,
 					  unsigned vm_id, bool ctx_switch)
 {
-	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	control |= INDIRECT_BUFFER_VALID;
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
-
-	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
-
-	control |= ib->length_dw | (vm_id << 24);
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
 
-	amdgpu_ring_write(ring, header);
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 					  (2 << 0) |
@@ -3205,7 +3203,8 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev)
 		}
 	}
 	adev->gfx.rlc.cs_data = ci_cs_data;
-	adev->gfx.rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+	adev->gfx.rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
+	adev->gfx.rlc.cp_table_size += 64 * 1024; /* GDS */
 
 	src_ptr = adev->gfx.rlc.reg_list;
 	dws = adev->gfx.rlc.reg_list_size;
@@ -3363,7 +3362,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
@@ -3371,7 +3370,7 @@ static void gfx_v7_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 			}
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3418,7 +3417,7 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev)
 	return orig;
 }
 
-void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 {
 	u32 tmp, i, mask;
 
@@ -3440,7 +3439,7 @@ void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev)
 	}
 }
 
-void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
+static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
 {
 	u32 tmp;
 
@@ -3455,7 +3454,7 @@ void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev)
  *
  * Halt the RLC ME (MicroEngine) (CIK).
  */
-void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
+static void gfx_v7_0_rlc_stop(struct amdgpu_device *adev)
 {
 	WREG32(mmRLC_CNTL, 0);
 
@@ -3531,7 +3530,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev)
 	WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
 
 	mutex_lock(&adev->grbm_idx_mutex);
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 	WREG32(mmRLC_LB_PARAMS, 0x00600408);
 	WREG32(mmRLC_LB_CNTL, 0x80000004);
@@ -3571,7 +3570,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3622,7 +3621,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
@@ -3673,7 +3672,7 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
 		tmp = gfx_v7_0_halt_rlc(adev);
 
 		mutex_lock(&adev->grbm_idx_mutex);
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 		WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
 		data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
@@ -3851,6 +3850,20 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
 	}
 }
 
+static void gfx_v7_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+						 u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
 static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
@@ -4107,7 +4120,7 @@ static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
  * Fetches a GPU clock counter snapshot (SI).
  * Returns the 64 bit clock counter snapshot.
  */
-uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+static uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
 	uint64_t clock;
 
@@ -4167,12 +4180,24 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
+static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v7_0_select_se_sh,
+};
+
+static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = {
+	.enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode,
+	.exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode
+};
+
 static int gfx_v7_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX7_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = GFX7_NUM_COMPUTE_RINGS;
+	adev->gfx.funcs = &gfx_v7_0_gfx_funcs;
+	adev->gfx.rlc.funcs = &gfx_v7_0_rlc_funcs;
 	gfx_v7_0_set_ring_funcs(adev);
 	gfx_v7_0_set_irq_funcs(adev);
 	gfx_v7_0_set_gds_init(adev);
@@ -4489,6 +4514,7 @@ static int gfx_v7_0_sw_fini(void *handle)
 	gfx_v7_0_cp_compute_fini(adev);
 	gfx_v7_0_rlc_fini(adev);
 	gfx_v7_0_mec_fini(adev);
+	gfx_v7_0_free_microcode(adev);
 
 	return 0;
 }
@@ -4816,7 +4842,7 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev,
 	case 2:
 		for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 			ring = &adev->gfx.compute_ring[i];
-			if ((ring->me == me_id) & (ring->pipe == pipe_id))
+			if ((ring->me == me_id) && (ring->pipe == pipe_id))
 				amdgpu_fence_process(ring);
 		}
 		break;
@@ -5015,16 +5041,22 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+	unsigned disable_masks[4 * 2];
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			gfx_v7_0_select_se_sh(adev, i, j);
+			gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+			if (i < 4 && j < 2)
+				gfx_v7_0_set_user_cu_inactive_bitmap(
+					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
@@ -5040,7 +5072,7 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev)
 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 		}
 	}
-	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
index e747aa935c88..94e3ea147c26 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gfx_v7_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev);
-void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev);
-void gfx_v7_0_rlc_stop(struct amdgpu_device *adev);
-uint64_t gfx_v7_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index f19bab68fd83..c30b6ac25d89 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -297,7 +297,8 @@ static const u32 polaris11_golden_common_all[] =
 static const u32 golden_settings_polaris10_a11[] =
 {
 	mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
-	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
+	mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
+	mmCB_HW_CONTROL_2, 0, 0x0f000000,
 	mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
 	mmDB_DEBUG2, 0xf00fffff, 0x00000400,
 	mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
@@ -836,6 +837,26 @@ err1:
 	return r;
 }
 
+
+static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
+	release_firmware(adev->gfx.pfp_fw);
+	adev->gfx.pfp_fw = NULL;
+	release_firmware(adev->gfx.me_fw);
+	adev->gfx.me_fw = NULL;
+	release_firmware(adev->gfx.ce_fw);
+	adev->gfx.ce_fw = NULL;
+	release_firmware(adev->gfx.rlc_fw);
+	adev->gfx.rlc_fw = NULL;
+	release_firmware(adev->gfx.mec_fw);
+	adev->gfx.mec_fw = NULL;
+	if ((adev->asic_type != CHIP_STONEY) &&
+	    (adev->asic_type != CHIP_TOPAZ))
+		release_firmware(adev->gfx.mec2_fw);
+	adev->gfx.mec2_fw = NULL;
+
+	kfree(adev->gfx.rlc.register_list_format);
+}
+
 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
 {
 	const char *chip_name;
@@ -1129,6 +1150,71 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
 	buffer[count++] = cpu_to_le32(0);
 }
 
+static void cz_init_cp_jump_table(struct amdgpu_device *adev)
+{
+	const __le32 *fw_data;
+	volatile u32 *dst_ptr;
+	int me, i, max_me = 4;
+	u32 bo_offset = 0;
+	u32 table_offset, table_size;
+
+	if (adev->asic_type == CHIP_CARRIZO)
+		max_me = 5;
+
+	/* write the cp table buffer */
+	dst_ptr = adev->gfx.rlc.cp_table_ptr;
+	for (me = 0; me < max_me; me++) {
+		if (me == 0) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.ce_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 1) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.pfp_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 2) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.me_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else if (me == 3) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		} else  if (me == 4) {
+			const struct gfx_firmware_header_v1_0 *hdr =
+				(const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
+			fw_data = (const __le32 *)
+				(adev->gfx.mec2_fw->data +
+				 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			table_offset = le32_to_cpu(hdr->jt_offset);
+			table_size = le32_to_cpu(hdr->jt_size);
+		}
+
+		for (i = 0; i < table_size; i ++) {
+			dst_ptr[bo_offset + i] =
+				cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
+		}
+
+		bo_offset += table_size;
+	}
+}
+
 static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 {
 	int r;
@@ -1144,6 +1230,18 @@ static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
 		amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
 		adev->gfx.rlc.clear_state_obj = NULL;
 	}
+
+	/* jump table block */
+	if (adev->gfx.rlc.cp_table_obj) {
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		if (unlikely(r != 0))
+			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+		amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+		amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
+		adev->gfx.rlc.cp_table_obj = NULL;
+	}
 }
 
 static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
@@ -1200,6 +1298,46 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
 		amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
 	}
 
+	if ((adev->asic_type == CHIP_CARRIZO) ||
+	    (adev->asic_type == CHIP_STONEY)) {
+		adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
+		if (adev->gfx.rlc.cp_table_obj == NULL) {
+			r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
+					     AMDGPU_GEM_DOMAIN_VRAM,
+					     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+					     NULL, NULL,
+					     &adev->gfx.rlc.cp_table_obj);
+			if (r) {
+				dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
+				return r;
+			}
+		}
+
+		r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
+		if (unlikely(r != 0)) {
+			dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
+			return r;
+		}
+		r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
+				  &adev->gfx.rlc.cp_table_gpu_addr);
+		if (r) {
+			amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+			dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
+			return r;
+		}
+		r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
+		if (r) {
+			dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
+			return r;
+		}
+
+		cz_init_cp_jump_table(adev);
+
+		amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
+		amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
+
+	}
+
 	return 0;
 }
 
@@ -1983,7 +2121,7 @@ static int gfx_v8_0_sw_fini(void *handle)
 
 	gfx_v8_0_rlc_fini(adev);
 
-	kfree(adev->gfx.rlc.register_list_format);
+	gfx_v8_0_free_microcode(adev);
 
 	return 0;
 }
@@ -3308,9 +3446,15 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
 	}
 }
 
-void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
+static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
+				  u32 se_num, u32 sh_num, u32 instance)
 {
-	u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	u32 data;
+
+	if (instance == 0xffffffff)
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
+	else
+		data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
 
 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
 		data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
@@ -3360,13 +3504,13 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v8_0_get_rb_active_bitmap(adev);
 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
 					       rb_bitmap_width_per_sh);
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	adev->gfx.config.backend_enable_mask = active_rbs;
@@ -3470,7 +3614,7 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 	 * making sure that the following register writes will be broadcasted
 	 * to all the shaders
 	 */
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	WREG32(mmPA_SC_FIFO_SIZE,
 		   (adev->gfx.config.sc_prim_fifo_size_frontend <<
@@ -3493,7 +3637,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
 				if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
@@ -3501,7 +3645,7 @@ static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 			}
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3662,13 +3806,13 @@ static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
 	WREG32(mmRLC_SRM_CNTL, data);
 }
 
-static void polaris11_init_power_gating(struct amdgpu_device *adev)
+static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
 {
 	uint32_t data;
 
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
-			AMD_PG_SUPPORT_GFX_SMG |
-			AMD_PG_SUPPORT_GFX_DMG)) {
+			      AMD_PG_SUPPORT_GFX_SMG |
+			      AMD_PG_SUPPORT_GFX_DMG)) {
 		data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
 		data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
 		data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
@@ -3693,6 +3837,53 @@ static void polaris11_init_power_gating(struct amdgpu_device *adev)
 	}
 }
 
+static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
+						bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
+						  bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+	else
+		data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
 static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 {
 	if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
@@ -3705,8 +3896,25 @@ static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
 		gfx_v8_0_init_save_restore_list(adev);
 		gfx_v8_0_enable_save_restore_machine(adev);
 
-		if (adev->asic_type == CHIP_POLARIS11)
-			polaris11_init_power_gating(adev);
+		if ((adev->asic_type == CHIP_CARRIZO) ||
+		    (adev->asic_type == CHIP_STONEY)) {
+			WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
+			gfx_v8_0_init_power_gating(adev);
+			WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
+			if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
+				cz_enable_sck_slow_down_on_power_up(adev, true);
+				cz_enable_sck_slow_down_on_power_down(adev, true);
+			} else {
+				cz_enable_sck_slow_down_on_power_up(adev, false);
+				cz_enable_sck_slow_down_on_power_down(adev, false);
+			}
+			if (adev->pg_flags & AMD_PG_SUPPORT_CP)
+				cz_enable_cp_power_gating(adev, true);
+			else
+				cz_enable_cp_power_gating(adev, false);
+		} else if (adev->asic_type == CHIP_POLARIS11) {
+			gfx_v8_0_init_power_gating(adev);
+		}
 	}
 }
 
@@ -3974,11 +4182,15 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
 		amdgpu_ring_write(ring, 0x3a00161a);
 		amdgpu_ring_write(ring, 0x0000002e);
 		break;
-	case CHIP_TOPAZ:
 	case CHIP_CARRIZO:
 		amdgpu_ring_write(ring, 0x00000002);
 		amdgpu_ring_write(ring, 0x00000000);
 		break;
+	case CHIP_TOPAZ:
+		amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
+				0x00000000 : 0x00000002);
+		amdgpu_ring_write(ring, 0x00000000);
+		break;
 	case CHIP_STONEY:
 		amdgpu_ring_write(ring, 0x00000000);
 		amdgpu_ring_write(ring, 0x00000000);
@@ -4941,7 +5153,7 @@ static int gfx_v8_0_soft_reset(void *handle)
  * Fetches a GPU clock counter snapshot.
  * Returns the 64 bit clock counter snapshot.
  */
-uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
+static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
 {
 	uint64_t clock;
 
@@ -5001,12 +5213,18 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
 	amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
 }
 
+static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
+	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
+	.select_se_sh = &gfx_v8_0_select_se_sh,
+};
+
 static int gfx_v8_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
+	adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
 	gfx_v8_0_set_ring_funcs(adev);
 	gfx_v8_0_set_irq_funcs(adev);
 	gfx_v8_0_set_gds_init(adev);
@@ -5039,51 +5257,43 @@ static int gfx_v8_0_late_init(void *handle)
 	return 0;
 }
 
-static void polaris11_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
-		bool enable)
+static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
+						       bool enable)
 {
 	uint32_t data, temp;
 
-	/* Send msg to SMU via Powerplay */
-	amdgpu_set_powergating_state(adev,
-			AMD_IP_BLOCK_TYPE_SMC,
-			enable ? AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
+	if (adev->asic_type == CHIP_POLARIS11)
+		/* Send msg to SMU via Powerplay */
+		amdgpu_set_powergating_state(adev,
+					     AMD_IP_BLOCK_TYPE_SMC,
+					     enable ?
+					     AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
 
-	if (enable) {
-		/* Enable static MGPG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable static MGPG */
+	if (enable)
 		data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
-
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	else
 		data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	}
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
 }
 
-static void polaris11_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
-		bool enable)
+static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
+							bool enable)
 {
 	uint32_t data, temp;
 
-	if (enable) {
-		/* Enable dynamic MGPG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable dynamic MGPG */
+	if (enable)
 		data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
-
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
+	else
 		data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	}
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
 }
 
 static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
@@ -5091,19 +5301,63 @@ static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *ade
 {
 	uint32_t data, temp;
 
-	if (enable) {
-		/* Enable quick PG */
-		temp = data = RREG32(mmRLC_PG_CNTL);
-		data |= 0x100000;
+	temp = data = RREG32(mmRLC_PG_CNTL);
+	/* Enable quick PG */
+	if (enable)
+		data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
-	} else {
-		temp = data = RREG32(mmRLC_PG_CNTL);
-		data &= ~0x100000;
+	if (temp != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
 
-		if (temp != data)
-			WREG32(mmRLC_PG_CNTL, data);
+static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
+					  bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+}
+
+static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
+						bool enable)
+{
+	u32 data, orig;
+
+	orig = data = RREG32(mmRLC_PG_CNTL);
+
+	if (enable)
+		data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+	else
+		data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
+
+	if (orig != data)
+		WREG32(mmRLC_PG_CNTL, data);
+
+	/* Read any GFX register to wake up GFX. */
+	if (!enable)
+		data = RREG32(mmDB_RENDER_CONTROL);
+}
+
+static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
+					  bool enable)
+{
+	if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
+		cz_enable_gfx_cg_power_gating(adev, true);
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
+			cz_enable_gfx_pipeline_power_gating(adev, true);
+	} else {
+		cz_enable_gfx_cg_power_gating(adev, false);
+		cz_enable_gfx_pipeline_power_gating(adev, false);
 	}
 }
 
@@ -5111,21 +5365,42 @@ static int gfx_v8_0_set_powergating_state(void *handle,
 					  enum amd_powergating_state state)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
 
 	if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
 		return 0;
 
 	switch (adev->asic_type) {
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
+			cz_update_gfx_cg_power_gating(adev, enable);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+		break;
 	case CHIP_POLARIS11:
-		if (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG)
-			polaris11_enable_gfx_static_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
-		else if (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG)
-			polaris11_enable_gfx_dynamic_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
+		else
+			gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
+
+		if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
+			polaris11_enable_gfx_quick_mg_power_gating(adev, true);
 		else
-			polaris11_enable_gfx_quick_mg_power_gating(adev,
-					state == AMD_PG_STATE_GATE ? true : false);
+			polaris11_enable_gfx_quick_mg_power_gating(adev, false);
 		break;
 	default:
 		break;
@@ -5139,7 +5414,7 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
 {
 	uint32_t data;
 
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 
 	WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
 	WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
@@ -5527,6 +5802,8 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev
 			WREG32(mmRLC_CGCG_CGLS_CTRL, data);
 	}
 
+	gfx_v8_0_wait_for_rlc_serdes(adev);
+
 	adev->gfx.rlc.funcs->exit_safe_mode(adev);
 }
 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
@@ -5652,17 +5929,6 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 				      unsigned vm_id, bool ctx_switch)
 {
 	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	if (ctx_switch)
-		next_rptr += 2;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
 	if (ctx_switch) {
@@ -5691,23 +5957,9 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_ib *ib,
 					  unsigned vm_id, bool ctx_switch)
 {
-	u32 header, control = 0;
-	u32 next_rptr = ring->wptr + 5;
-
-	control |= INDIRECT_BUFFER_VALID;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
-	amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, next_rptr);
-
-	header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
+	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
 
-	control |= ib->length_dw | (vm_id << 24);
-
-	amdgpu_ring_write(ring, header);
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
 					  (2 << 0) |
@@ -6160,9 +6412,9 @@ static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
 {
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
-	case CHIP_STONEY:
 		adev->gfx.rlc.funcs = &iceland_rlc_funcs;
 		break;
+	case CHIP_STONEY:
 	case CHIP_CARRIZO:
 		adev->gfx.rlc.funcs = &cz_rlc_funcs;
 		break;
@@ -6200,6 +6452,20 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
 	}
 }
 
+static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+						 u32 bitmap)
+{
+	u32 data;
+
+	if (!bitmap)
+		return;
+
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+
+	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
+
 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
@@ -6220,16 +6486,22 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+	unsigned disable_masks[4 * 2];
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			gfx_v8_0_select_se_sh(adev, i, j);
+			gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+			if (i < 4 && j < 2)
+				gfx_v8_0_set_user_cu_inactive_bitmap(
+					adev, disable_masks[i * 2 + j]);
 			bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
@@ -6245,7 +6517,7 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
 			ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
 		}
 	}
-	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+	gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
index 16a49f53a2fa..bc82c794312c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.h
@@ -26,7 +26,6 @@
 
 extern const struct amd_ip_funcs gfx_v8_0_ip_funcs;
 
-uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 1feb6439cb0b..d24a82bd0c7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -39,6 +39,7 @@
 
 static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v7_0_wait_for_idle(void *handle);
 
 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
@@ -73,39 +74,15 @@ static void gmc_v7_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
-/**
- * gmc7_mc_wait_for_idle - wait for MC idle callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Wait for the MC (memory controller) to be idle.
- * (evergreen+).
- * Returns 0 if the MC is idle, -1 if not.
- */
-int gmc_v7_0_mc_wait_for_idle(struct amdgpu_device *adev)
-{
-	unsigned i;
-	u32 tmp;
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		/* read MC_STATUS */
-		tmp = RREG32(mmSRBM_STATUS) & 0x1F00;
-		if (!tmp)
-			return 0;
-		udelay(1);
-	}
-	return -1;
-}
-
-void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save)
+static void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
+			     struct amdgpu_mode_mc_save *save)
 {
 	u32 blackout;
 
 	if (adev->mode_info.num_crtc)
 		amdgpu_display_stop_mc_access(adev, save);
 
-	amdgpu_asic_wait_for_mc_idle(adev);
+	gmc_v7_0_wait_for_idle((void *)adev);
 
 	blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
 	if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -120,8 +97,8 @@ void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
 	udelay(100);
 }
 
-void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save)
+static void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
+			       struct amdgpu_mode_mc_save *save)
 {
 	u32 tmp;
 
@@ -311,7 +288,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 		amdgpu_display_set_vga_render_state(adev, false);
 
 	gmc_v7_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v7_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	/* Update configuration */
@@ -331,7 +308,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmMC_VM_AGP_BASE, 0);
 	WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
 	WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v7_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v7_0_mc_resume(adev, &save);
@@ -1137,7 +1114,7 @@ static int gmc_v7_0_soft_reset(void *handle)
 
 	if (srbm_soft_reset) {
 		gmc_v7_0_mc_stop(adev, &save);
-		if (gmc_v7_0_wait_for_idle(adev)) {
+		if (gmc_v7_0_wait_for_idle((void *)adev)) {
 			dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
index 36fcbbc46ada..0b386b5d2f7a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gmc_v7_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gmc_v7_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save);
-void gmc_v7_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save);
-int gmc_v7_0_mc_wait_for_idle(struct amdgpu_device *adev);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 9945d5bbf1fe..717359d3ba8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -41,6 +41,7 @@
 
 static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
+static int gmc_v8_0_wait_for_idle(void *handle);
 
 MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
@@ -147,44 +148,15 @@ static void gmc_v8_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
-/**
- * gmc8_mc_wait_for_idle - wait for MC idle callback.
- *
- * @adev: amdgpu_device pointer
- *
- * Wait for the MC (memory controller) to be idle.
- * (evergreen+).
- * Returns 0 if the MC is idle, -1 if not.
- */
-int gmc_v8_0_mc_wait_for_idle(struct amdgpu_device *adev)
-{
-	unsigned i;
-	u32 tmp;
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		/* read MC_STATUS */
-		tmp = RREG32(mmSRBM_STATUS) & (SRBM_STATUS__VMC_BUSY_MASK |
-					       SRBM_STATUS__MCB_BUSY_MASK |
-					       SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
-					       SRBM_STATUS__MCC_BUSY_MASK |
-					       SRBM_STATUS__MCD_BUSY_MASK |
-					       SRBM_STATUS__VMC1_BUSY_MASK);
-		if (!tmp)
-			return 0;
-		udelay(1);
-	}
-	return -1;
-}
-
-void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save)
+static void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
+			     struct amdgpu_mode_mc_save *save)
 {
 	u32 blackout;
 
 	if (adev->mode_info.num_crtc)
 		amdgpu_display_stop_mc_access(adev, save);
 
-	amdgpu_asic_wait_for_mc_idle(adev);
+	gmc_v8_0_wait_for_idle(adev);
 
 	blackout = RREG32(mmMC_SHARED_BLACKOUT_CNTL);
 	if (REG_GET_FIELD(blackout, MC_SHARED_BLACKOUT_CNTL, BLACKOUT_MODE) != 1) {
@@ -199,8 +171,8 @@ void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
 	udelay(100);
 }
 
-void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save)
+static void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
+			       struct amdgpu_mode_mc_save *save)
 {
 	u32 tmp;
 
@@ -393,7 +365,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 		amdgpu_display_set_vga_render_state(adev, false);
 
 	gmc_v8_0_mc_stop(adev, &save);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v8_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	/* Update configuration */
@@ -413,7 +385,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
 	WREG32(mmMC_VM_AGP_BASE, 0);
 	WREG32(mmMC_VM_AGP_TOP, 0x0FFFFFFF);
 	WREG32(mmMC_VM_AGP_BOT, 0x0FFFFFFF);
-	if (amdgpu_asic_wait_for_mc_idle(adev)) {
+	if (gmc_v8_0_wait_for_idle((void *)adev)) {
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v8_0_mc_resume(adev, &save);
@@ -1140,7 +1112,7 @@ static int gmc_v8_0_soft_reset(void *handle)
 
 	if (srbm_soft_reset) {
 		gmc_v8_0_mc_stop(adev, &save);
-		if (gmc_v8_0_wait_for_idle(adev)) {
+		if (gmc_v8_0_wait_for_idle((void *)adev)) {
 			dev_warn(adev->dev, "Wait for GMC idle timed out !\n");
 		}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
index 973436086b38..fc5001a8119d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.h
@@ -26,11 +26,4 @@
 
 extern const struct amd_ip_funcs gmc_v8_0_ip_funcs;
 
-/* XXX these shouldn't be exported */
-void gmc_v8_0_mc_stop(struct amdgpu_device *adev,
-		      struct amdgpu_mode_mc_save *save);
-void gmc_v8_0_mc_resume(struct amdgpu_device *adev,
-			struct amdgpu_mode_mc_save *save);
-int gmc_v8_0_mc_wait_for_idle(struct amdgpu_device *adev);
-
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
index 460bc8ad37e6..825ccd63f2dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_dpm.c
@@ -72,6 +72,11 @@ static int iceland_dpm_sw_init(void *handle)
 
 static int iceland_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index a789a863d677..5a0e245771ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -507,19 +507,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable)
 	    pi->caps_db_ramping ||
 	    pi->caps_td_ramping ||
 	    pi->caps_tcp_ramping) {
-		gfx_v7_0_enter_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);
 
 		if (enable) {
 			ret = kv_program_pt_config_registers(adev, didt_config_kv);
 			if (ret) {
-				gfx_v7_0_exit_rlc_safe_mode(adev);
+				adev->gfx.rlc.funcs->exit_safe_mode(adev);
 				return ret;
 			}
 		}
 
 		kv_do_enable_didt(adev, enable);
 
-		gfx_v7_0_exit_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index f4c3130d3fdb..ac3730a6e49f 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -105,6 +105,15 @@ static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
+static void sdma_v2_4_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		release_firmware(adev->sdma.instance[i].fw);
+		adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /**
  * sdma_v2_4_init_microcode - load ucode images from disk
  *
@@ -246,19 +255,6 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
 				   unsigned vm_id, bool ctx_switch)
 {
 	u32 vmid = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 2)
-		next_rptr++;
-
-	next_rptr += 6;
-
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
-	amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
-	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
@@ -461,6 +457,8 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -489,7 +487,11 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
 
+	sdma_v2_4_enable(adev, true);
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -580,8 +582,8 @@ static int sdma_v2_4_start(struct amdgpu_device *adev)
 			return -EINVAL;
 	}
 
-	/* unhalt the MEs */
-	sdma_v2_4_enable(adev, true);
+	/* halt the engine before programing */
+	sdma_v2_4_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = sdma_v2_4_gfx_resume(adev);
@@ -1012,6 +1014,7 @@ static int sdma_v2_4_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	sdma_v2_4_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 31d99b0010f7..f00db6f4c04c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -236,6 +236,15 @@ static void sdma_v3_0_init_golden_registers(struct amdgpu_device *adev)
 	}
 }
 
+static void sdma_v3_0_free_microcode(struct amdgpu_device *adev)
+{
+	int i;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		release_firmware(adev->sdma.instance[i].fw);
+		adev->sdma.instance[i].fw = NULL;
+	}
+}
+
 /**
  * sdma_v3_0_init_microcode - load ucode images from disk
  *
@@ -406,18 +415,6 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
 				   unsigned vm_id, bool ctx_switch)
 {
 	u32 vmid = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 2)
-		next_rptr++;
-	next_rptr += 6;
-
-	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
-			  SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
-	amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
-	amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
-	amdgpu_ring_write(ring, next_rptr);
 
 	/* IB packet must end on a 8 DW boundary */
 	sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8);
@@ -672,6 +669,8 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		/* Initialize the ring buffer's read and write pointers */
 		WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
 		WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_RPTR + sdma_offsets[i], 0);
+		WREG32(mmSDMA0_GFX_IB_OFFSET + sdma_offsets[i], 0);
 
 		/* set the wb address whether it's enabled or not */
 		WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
@@ -711,7 +710,15 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
 		WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
 
 		ring->ready = true;
+	}
 
+	/* unhalt the MEs */
+	sdma_v3_0_enable(adev, true);
+	/* enable sdma ring preemption */
+	sdma_v3_0_ctx_switch_enable(adev, true);
+
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		ring = &adev->sdma.instance[i].ring;
 		r = amdgpu_ring_test_ring(ring);
 		if (r) {
 			ring->ready = false;
@@ -804,10 +811,9 @@ static int sdma_v3_0_start(struct amdgpu_device *adev)
 		}
 	}
 
-	/* unhalt the MEs */
-	sdma_v3_0_enable(adev, true);
-	/* enable sdma ring preemption */
-	sdma_v3_0_ctx_switch_enable(adev, true);
+	/* disble sdma engine before programing it */
+	sdma_v3_0_ctx_switch_enable(adev, false);
+	sdma_v3_0_enable(adev, false);
 
 	/* start the gfx rings and rlc compute queues */
 	r = sdma_v3_0_gfx_resume(adev);
@@ -1247,6 +1253,7 @@ static int sdma_v3_0_sw_fini(void *handle)
 	for (i = 0; i < adev->sdma.num_instances; i++)
 		amdgpu_ring_fini(&adev->sdma.instance[i].ring);
 
+	sdma_v3_0_free_microcode(adev);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
index b7615cefcac4..f06f6f4dc3a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_dpm.c
@@ -71,6 +71,11 @@ static int tonga_dpm_sw_init(void *handle)
 
 static int tonga_dpm_sw_fini(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	release_firmware(adev->pm.fw);
+	adev->pm.fw = NULL;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index f07551476a70..416c8567d3ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -34,6 +34,8 @@
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
+#include "bif/bif_4_1_d.h"
+
 static void uvd_v4_2_mc_resume(struct amdgpu_device *adev);
 static void uvd_v4_2_init_cg(struct amdgpu_device *adev);
 static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev);
@@ -439,6 +441,32 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v4_2_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v4_2_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v4_2_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v4_2_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -763,6 +791,8 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v4_2_ring_emit_ib,
 	.emit_fence = uvd_v4_2_ring_emit_fence,
+	.emit_hdp_flush = uvd_v4_2_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v4_2_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v4_2_ring_test_ring,
 	.test_ib = uvd_v4_2_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index e0a76a883d46..dd636c4c4b08 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -31,6 +31,7 @@
 #include "uvd/uvd_5_0_sh_mask.h"
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
+#include "bif/bif_5_0_d.h"
 #include "vi.h"
 
 static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -489,6 +490,32 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v5_0_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v5_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v5_0_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v5_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v5_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -815,6 +842,8 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v5_0_ring_emit_ib,
 	.emit_fence = uvd_v5_0_ring_emit_fence,
+	.emit_hdp_flush = uvd_v5_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v5_0_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v5_0_ring_test_ring,
 	.test_ib = uvd_v5_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index c9929d665c01..07e9a987fbee 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -33,6 +33,7 @@
 #include "oss/oss_2_0_sh_mask.h"
 #include "smu/smu_7_1_3_d.h"
 #include "smu/smu_7_1_3_sh_mask.h"
+#include "bif/bif_5_1_d.h"
 #include "vi.h"
 
 static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev);
@@ -385,8 +386,8 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	uint32_t mp_swap_cntl;
 	int i, j, r;
 
-	/*disable DPG */
-	WREG32_P(mmUVD_POWER_STATUS, 0, ~(1 << 2));
+	/* disable DPG */
+	WREG32_P(mmUVD_POWER_STATUS, 0, ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
 
 	/* disable byte swapping */
 	lmi_swap_cntl = 0;
@@ -405,17 +406,21 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	}
 
 	/* disable interupt */
-	WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
+	WREG32_P(mmUVD_MASTINT_EN, 0, ~UVD_MASTINT_EN__VCPU_EN_MASK);
 
 	/* stall UMC and register bus before resetting VCPU */
-	WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
+	WREG32_P(mmUVD_LMI_CTRL2, UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 	mdelay(1);
 
 	/* put LMI, VCPU, RBC etc... into reset */
-	WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
+	WREG32(mmUVD_SOFT_RESET,
+		UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__RBC_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__CXW_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
 		UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
 	mdelay(5);
 
@@ -424,8 +429,13 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	mdelay(5);
 
 	/* initialize UVD memory controller */
-	WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
-			     (1 << 21) | (1 << 9) | (1 << 20));
+	WREG32(mmUVD_LMI_CTRL,
+		(0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
+		UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
+		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
+		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
+		UVD_LMI_CTRL__REQ_MODE_MASK |
+		UVD_LMI_CTRL__DISABLE_ON_FWV_FAIL_MASK);
 
 #ifdef __BIG_ENDIAN
 	/* swap (8 in 32) RB and IB */
@@ -447,10 +457,10 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	mdelay(5);
 
 	/* enable VCPU clock */
-	WREG32(mmUVD_VCPU_CNTL,  1 << 9);
+	WREG32(mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK);
 
 	/* enable UMC */
-	WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+	WREG32_P(mmUVD_LMI_CTRL2, 0, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
 
 	/* boot up the VCPU */
 	WREG32(mmUVD_SOFT_RESET, 0);
@@ -484,10 +494,12 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 		return r;
 	}
 	/* enable master interrupt */
-	WREG32_P(mmUVD_MASTINT_EN, 3 << 1, ~(3 << 1));
+	WREG32_P(mmUVD_MASTINT_EN,
+		(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK),
+		~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK));
 
 	/* clear the bit 4 of UVD_STATUS */
-	WREG32_P(mmUVD_STATUS, 0, ~(2 << 1));
+	WREG32_P(mmUVD_STATUS, 0, ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
 
 	rb_bufsz = order_base_2(ring->ring_size);
 	tmp = 0;
@@ -581,6 +593,32 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
 }
 
 /**
+ * uvd_v6_0_ring_emit_hdp_flush - emit an hdp flush
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp flush.
+ */
+static void uvd_v6_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0));
+	amdgpu_ring_write(ring, 0);
+}
+
+/**
+ * uvd_v6_0_ring_hdp_invalidate - emit an hdp invalidate
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Emits an hdp invalidate.
+ */
+static void uvd_v6_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+	amdgpu_ring_write(ring, PACKET0(mmHDP_DEBUG0, 0));
+	amdgpu_ring_write(ring, 1);
+}
+
+/**
  * uvd_v6_0_ring_test_ring - register write test
  *
  * @ring: amdgpu_ring pointer
@@ -847,7 +885,8 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 	static int curstate = -1;
 
-	if (adev->asic_type == CHIP_FIJI)
+	if (adev->asic_type == CHIP_FIJI ||
+			adev->asic_type == CHIP_POLARIS10)
 		uvd_v6_set_bypass_mode(adev, enable);
 
 	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
@@ -919,6 +958,8 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
 	.parse_cs = amdgpu_uvd_ring_parse_cs,
 	.emit_ib = uvd_v6_0_ring_emit_ib,
 	.emit_fence = uvd_v6_0_ring_emit_fence,
+	.emit_hdp_flush = uvd_v6_0_ring_emit_hdp_flush,
+	.emit_hdp_invalidate = uvd_v6_0_ring_emit_hdp_invalidate,
 	.test_ring = uvd_v6_0_ring_test_ring,
 	.test_ib = uvd_v6_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 2c88d0b66cf3..cda7def9dc2c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -203,6 +203,29 @@ static void vi_didt_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 	spin_unlock_irqrestore(&adev->didt_idx_lock, flags);
 }
 
+static u32 vi_gc_cac_rreg(struct amdgpu_device *adev, u32 reg)
+{
+	unsigned long flags;
+	u32 r;
+
+	spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+	WREG32(mmGC_CAC_IND_INDEX, (reg));
+	r = RREG32(mmGC_CAC_IND_DATA);
+	spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+	return r;
+}
+
+static void vi_gc_cac_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&adev->gc_cac_idx_lock, flags);
+	WREG32(mmGC_CAC_IND_INDEX, (reg));
+	WREG32(mmGC_CAC_IND_DATA, (v));
+	spin_unlock_irqrestore(&adev->gc_cac_idx_lock, flags);
+}
+
+
 static const u32 tonga_mgcg_cgcg_init[] =
 {
 	mmCGTT_DRM_CLK_CTRL0, 0xffffffff, 0x00600100,
@@ -421,6 +444,20 @@ static bool vi_read_bios_from_rom(struct amdgpu_device *adev,
 	return true;
 }
 
+static u32 vi_get_virtual_caps(struct amdgpu_device *adev)
+{
+	u32 caps = 0;
+	u32 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, IOV_ENABLE))
+		caps |= AMDGPU_VIRT_CAPS_SRIOV_EN;
+
+	if (REG_GET_FIELD(reg, BIF_IOV_FUNC_IDENTIFIER, FUNC_IDENTIFIER))
+		caps |= AMDGPU_VIRT_CAPS_IS_VF;
+
+	return caps;
+}
+
 static const struct amdgpu_allowed_register_entry tonga_allowed_read_registers[] = {
 	{mmGB_MACROTILE_MODE7, true},
 };
@@ -519,12 +556,12 @@ static uint32_t vi_read_indexed_register(struct amdgpu_device *adev, u32 se_num,
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v8_0_select_se_sh(adev, se_num, sh_num);
+		amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
 
 	val = RREG32(reg_offset);
 
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 	return val;
 }
@@ -583,7 +620,7 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
 	return -EINVAL;
 }
 
-static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
+static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
 	u32 i;
 
@@ -598,11 +635,14 @@ static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 
 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff)
-			break;
+		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+			/* enable BM */
+			pci_set_master(adev->pdev);
+			return 0;
+		}
 		udelay(1);
 	}
-
+	return -EINVAL;
 }
 
 static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
@@ -628,13 +668,15 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun
  */
 static int vi_asic_reset(struct amdgpu_device *adev)
 {
+	int r;
+
 	vi_set_bios_scratch_engine_hung(adev, true);
 
-	vi_gpu_pci_config_reset(adev);
+	r = vi_gpu_pci_config_reset(adev);
 
 	vi_set_bios_scratch_engine_hung(adev, false);
 
-	return 0;
+	return r;
 }
 
 static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@@ -1118,9 +1160,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
 	.get_xclk = &vi_get_xclk,
 	.set_uvd_clocks = &vi_set_uvd_clocks,
 	.set_vce_clocks = &vi_set_vce_clocks,
-	/* these should be moved to their own ip modules */
-	.get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
-	.wait_for_mc_idle = &gmc_v8_0_mc_wait_for_idle,
+	.get_virtual_caps = &vi_get_virtual_caps,
 };
 
 static int vi_common_early_init(void *handle)
@@ -1141,6 +1181,8 @@ static int vi_common_early_init(void *handle)
 	adev->uvd_ctx_wreg = &vi_uvd_ctx_wreg;
 	adev->didt_rreg = &vi_didt_rreg;
 	adev->didt_wreg = &vi_didt_wreg;
+	adev->gc_cac_rreg = &vi_gc_cac_rreg;
+	adev->gc_cac_wreg = &vi_gc_cac_wreg;
 
 	adev->asic_funcs = &vi_asic_funcs;
 
@@ -1207,19 +1249,39 @@ static int vi_common_early_init(void *handle)
 			AMD_CG_SUPPORT_HDP_LS |
 			AMD_CG_SUPPORT_SDMA_MGCG |
 			AMD_CG_SUPPORT_SDMA_LS;
+		/* rev0 hardware doesn't support PG */
 		adev->pg_flags = 0;
+		if (adev->rev_id != 0x00)
+			adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG |
+				AMD_PG_SUPPORT_GFX_SMG |
+				AMD_PG_SUPPORT_GFX_DMG |
+				AMD_PG_SUPPORT_CP |
+				AMD_PG_SUPPORT_RLC_SMU_HS |
+				AMD_PG_SUPPORT_GFX_PIPELINE;
 		adev->external_rev_id = adev->rev_id + 0x1;
 		break;
 	case CHIP_STONEY:
 		adev->cg_flags = AMD_CG_SUPPORT_UVD_MGCG |
 			AMD_CG_SUPPORT_GFX_MGCG |
 			AMD_CG_SUPPORT_GFX_MGLS |
+			AMD_CG_SUPPORT_GFX_RLC_LS |
+			AMD_CG_SUPPORT_GFX_CP_LS |
+			AMD_CG_SUPPORT_GFX_CGTS |
+			AMD_CG_SUPPORT_GFX_MGLS |
+			AMD_CG_SUPPORT_GFX_CGTS_LS |
+			AMD_CG_SUPPORT_GFX_CGCG |
+			AMD_CG_SUPPORT_GFX_CGLS |
 			AMD_CG_SUPPORT_BIF_LS |
 			AMD_CG_SUPPORT_HDP_MGCG |
 			AMD_CG_SUPPORT_HDP_LS |
 			AMD_CG_SUPPORT_SDMA_MGCG |
 			AMD_CG_SUPPORT_SDMA_LS;
-		adev->pg_flags = 0;
+		adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
+			AMD_PG_SUPPORT_GFX_SMG |
+			AMD_PG_SUPPORT_GFX_DMG |
+			AMD_PG_SUPPORT_GFX_PIPELINE |
+			AMD_PG_SUPPORT_CP |
+			AMD_PG_SUPPORT_RLC_SMU_HS;
 		adev->external_rev_id = adev->rev_id + 0x1;
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ac005796b71c..4f3849ac8c07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -63,13 +63,12 @@ static struct kfd_process *create_process(const struct task_struct *thread);
 void kfd_process_create_wq(void)
 {
 	if (!kfd_process_wq)
-		kfd_process_wq = create_workqueue("kfd_process_wq");
+		kfd_process_wq = alloc_workqueue("kfd_process_wq", 0, 0);
 }
 
 void kfd_process_destroy_wq(void)
 {
 	if (kfd_process_wq) {
-		flush_workqueue(kfd_process_wq);
 		destroy_workqueue(kfd_process_wq);
 		kfd_process_wq = NULL;
 	}
@@ -242,13 +241,19 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	pqm_uninit(&p->pqm);
 
 	/* Iterate over all process device data structure and check
-	 * if we should reset all wavefronts */
-	list_for_each_entry(pdd, &p->per_device_data, per_device_list)
+	 * if we should delete debug managers and reset all wavefronts
+	 */
+	list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
+		if ((pdd->dev->dbgmgr) &&
+				(pdd->dev->dbgmgr->pasid == p->pasid))
+			kfd_dbgmgr_destroy(pdd->dev->dbgmgr);
+
 		if (pdd->reset_wavefronts) {
 			pr_warn("amdkfd: Resetting all wave fronts\n");
 			dbgdev_wave_reset_wavefronts(pdd->dev, p);
 			pdd->reset_wavefronts = false;
 		}
+	}
 
 	mutex_unlock(&p->mutex);
 
@@ -324,6 +329,7 @@ err_process_pqm_init:
 	synchronize_rcu();
 	mmu_notifier_unregister_no_release(&process->mmu_notifier, process->mm);
 err_mmu_notifier:
+	mutex_destroy(&process->mutex);
 	kfd_pasid_free(process->pasid);
 err_alloc_pasid:
 	kfree(process->queues);
@@ -404,42 +410,52 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid)
 
 	idx = srcu_read_lock(&kfd_processes_srcu);
 
+	/*
+	 * Look for the process that matches the pasid. If there is no such
+	 * process, we either released it in amdkfd's own notifier, or there
+	 * is a bug. Unfortunately, there is no way to tell...
+	 */
 	hash_for_each_rcu(kfd_processes_table, i, p, kfd_processes)
-		if (p->pasid == pasid)
-			break;
+		if (p->pasid == pasid) {
 
-	srcu_read_unlock(&kfd_processes_srcu, idx);
+			srcu_read_unlock(&kfd_processes_srcu, idx);
 
-	BUG_ON(p->pasid != pasid);
+			pr_debug("Unbinding process %d from IOMMU\n", pasid);
 
-	mutex_lock(&p->mutex);
+			mutex_lock(&p->mutex);
 
-	if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
-		kfd_dbgmgr_destroy(dev->dbgmgr);
+			if ((dev->dbgmgr) && (dev->dbgmgr->pasid == p->pasid))
+				kfd_dbgmgr_destroy(dev->dbgmgr);
 
-	pqm_uninit(&p->pqm);
+			pqm_uninit(&p->pqm);
 
-	pdd = kfd_get_process_device_data(dev, p);
+			pdd = kfd_get_process_device_data(dev, p);
 
-	if (!pdd) {
-		mutex_unlock(&p->mutex);
-		return;
-	}
+			if (!pdd) {
+				mutex_unlock(&p->mutex);
+				return;
+			}
 
-	if (pdd->reset_wavefronts) {
-		dbgdev_wave_reset_wavefronts(pdd->dev, p);
-		pdd->reset_wavefronts = false;
-	}
+			if (pdd->reset_wavefronts) {
+				dbgdev_wave_reset_wavefronts(pdd->dev, p);
+				pdd->reset_wavefronts = false;
+			}
 
-	/*
-	 * Just mark pdd as unbound, because we still need it to call
-	 * amd_iommu_unbind_pasid() in when the process exits.
-	 * We don't call amd_iommu_unbind_pasid() here
-	 * because the IOMMU called us.
-	 */
-	pdd->bound = false;
+			/*
+			 * Just mark pdd as unbound, because we still need it
+			 * to call amd_iommu_unbind_pasid() in when the
+			 * process exits.
+			 * We don't call amd_iommu_unbind_pasid() here
+			 * because the IOMMU called us.
+			 */
+			pdd->bound = false;
 
-	mutex_unlock(&p->mutex);
+			mutex_unlock(&p->mutex);
+
+			return;
+		}
+
+	srcu_read_unlock(&kfd_processes_srcu, idx);
 }
 
 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 74909e72a009..884c96f50c3d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -666,7 +666,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
 			dev->node_props.simd_count);
 
 	if (dev->mem_bank_count < dev->node_props.mem_banks_count) {
-		pr_warn("kfd: mem_banks_count truncated from %d to %d\n",
+		pr_info_once("kfd: mem_banks_count truncated from %d to %d\n",
 				dev->node_props.mem_banks_count,
 				dev->mem_bank_count);
 		sysfs_show_32bit_prop(buffer, "mem_banks_count",
diff --git a/drivers/gpu/drm/amd/include/amd_pcie.h b/drivers/gpu/drm/amd/include/amd_pcie.h
index 7c2a916c1e63..5eb895fd98bf 100644
--- a/drivers/gpu/drm/amd/include/amd_pcie.h
+++ b/drivers/gpu/drm/amd/include/amd_pcie.h
@@ -37,6 +37,13 @@
 #define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_MASK   0x0000FFFF
 #define CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_SHIFT  0
 
+/* gen: chipset 1/2, asic 1/2/3 */
+#define AMDGPU_DEFAULT_PCIE_GEN_MASK (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \
+				      | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \
+				      | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3)
+
 /* Following flags shows PCIe lane width switch supported in driver which are decided by chipset and ASIC */
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X1          0x00010000
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X2          0x00020000
@@ -47,4 +54,11 @@
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_X32         0x00400000
 #define CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT       16
 
+/* 1/2/4/8/16 lanes */
+#define AMDGPU_DEFAULT_PCIE_MLW_MASK (CAIL_PCIE_LINK_WIDTH_SUPPORT_X1 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \
+				      | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+
 #endif
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index 6080951d539d..a74a0d2ff1ca 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -26,15 +26,6 @@
 #define AMD_MAX_USEC_TIMEOUT		100000  /* 100 ms */
 
 /*
-* Supported GPU families (aligned with amdgpu_drm.h)
-*/
-#define AMD_FAMILY_UNKNOWN              0
-#define AMD_FAMILY_CI                   120 /* Bonaire, Hawaii */
-#define AMD_FAMILY_KV                   125 /* Kaveri, Kabini, Mullins */
-#define AMD_FAMILY_VI                   130 /* Iceland, Tonga */
-#define AMD_FAMILY_CZ                   135 /* Carrizo */
-
-/*
  * Supported ASIC types
  */
 enum amd_asic_type {
@@ -120,6 +111,8 @@ enum amd_powergating_state {
 #define AMD_PG_SUPPORT_SDMA			(1 << 8)
 #define AMD_PG_SUPPORT_ACP			(1 << 9)
 #define AMD_PG_SUPPORT_SAMU			(1 << 10)
+#define AMD_PG_SUPPORT_GFX_QUICK_MG		(1 << 11)
+#define AMD_PG_SUPPORT_GFX_PIPELINE		(1 << 12)
 
 enum amd_pm_state_type {
 	/* not used for dpm */
@@ -157,6 +150,7 @@ struct amd_ip_funcs {
 	int (*hw_init)(void *handle);
 	/* tears down the hw state */
 	int (*hw_fini)(void *handle);
+	void (*late_fini)(void *handle);
 	/* handles IP specific hw/sw changes for suspend */
 	int (*suspend)(void *handle);
 	/* handles IP specific hw/sw changes for resume */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
index ebaf67bb1589..90ff7c8a6011 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_d.h
@@ -2823,4 +2823,7 @@
 #define mmDC_EDC_CSINVOC_CNT                                                    0x3192
 #define mmDC_EDC_RESTORE_CNT                                                    0x3193
 
+#define mmGC_CAC_IND_INDEX                                                      0x129a
+#define mmGC_CAC_IND_DATA                                                       0x129b
+
 #endif /* GFX_8_0_D_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
index 7d722458d9f5..4070ca3a68eb 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_8_0_sh_mask.h
@@ -8730,8 +8730,6 @@
 #define RLC_GPM_STAT__DYN_CU_POWERING_DOWN__SHIFT 0x10
 #define RLC_GPM_STAT__ABORTED_PD_SEQUENCE_MASK 0x20000
 #define RLC_GPM_STAT__ABORTED_PD_SEQUENCE__SHIFT 0x11
-#define RLC_GPM_STAT__RESERVED_MASK 0xfc0000
-#define RLC_GPM_STAT__RESERVED__SHIFT 0x12
 #define RLC_GPM_STAT__PG_ERROR_STATUS_MASK 0xff000000
 #define RLC_GPM_STAT__PG_ERROR_STATUS__SHIFT 0x18
 #define RLC_GPU_CLOCK_32_RES_SEL__RES_SEL_MASK 0x3f
@@ -8764,8 +8762,10 @@
 #define RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE__SHIFT 0x12
 #define RLC_PG_CNTL__SMU_HANDSHAKE_ENABLE_MASK 0x80000
 #define RLC_PG_CNTL__SMU_HANDSHAKE_ENABLE__SHIFT 0x13
-#define RLC_PG_CNTL__RESERVED1_MASK 0xf00000
-#define RLC_PG_CNTL__RESERVED1__SHIFT 0x14
+#define RLC_PG_CNTL__QUICK_PG_ENABLE_MASK 0x100000
+#define RLC_PG_CNTL__QUICK_PG_ENABLE__SHIFT 0x14
+#define RLC_PG_CNTL__RESERVED1_MASK 0xe00000
+#define RLC_PG_CNTL__RESERVED1__SHIFT 0x15
 #define RLC_GPM_THREAD_PRIORITY__THREAD0_PRIORITY_MASK 0xff
 #define RLC_GPM_THREAD_PRIORITY__THREAD0_PRIORITY__SHIFT 0x0
 #define RLC_GPM_THREAD_PRIORITY__THREAD1_PRIORITY_MASK 0xff00
@@ -9102,8 +9102,6 @@
 #define RLC_GPM_LOG_CONT__CONT__SHIFT 0x0
 #define RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG_MASK 0xff
 #define RLC_PG_DELAY_3__CGCG_ACTIVE_BEFORE_CGPG__SHIFT 0x0
-#define RLC_PG_DELAY_3__RESERVED_MASK 0xffffff00
-#define RLC_PG_DELAY_3__RESERVED__SHIFT 0x8
 #define RLC_GPM_INT_DISABLE_TH0__DISABLE_MASK 0xffffffff
 #define RLC_GPM_INT_DISABLE_TH0__DISABLE__SHIFT 0x0
 #define RLC_GPM_INT_DISABLE_TH1__DISABLE_MASK 0xffffffff
@@ -9124,14 +9122,8 @@
 #define RLC_SRM_DEBUG_SELECT__RESERVED__SHIFT 0x8
 #define RLC_SRM_DEBUG__DATA_MASK 0xffffffff
 #define RLC_SRM_DEBUG__DATA__SHIFT 0x0
-#define RLC_SRM_ARAM_ADDR__ADDR_MASK 0x3ff
-#define RLC_SRM_ARAM_ADDR__ADDR__SHIFT 0x0
-#define RLC_SRM_ARAM_ADDR__RESERVED_MASK 0xfffffc00
-#define RLC_SRM_ARAM_ADDR__RESERVED__SHIFT 0xa
 #define RLC_SRM_ARAM_DATA__DATA_MASK 0xffffffff
 #define RLC_SRM_ARAM_DATA__DATA__SHIFT 0x0
-#define RLC_SRM_DRAM_ADDR__ADDR_MASK 0x3ff
-#define RLC_SRM_DRAM_ADDR__ADDR__SHIFT 0x0
 #define RLC_SRM_DRAM_ADDR__RESERVED_MASK 0xfffffc00
 #define RLC_SRM_DRAM_ADDR__RESERVED__SHIFT 0xa
 #define RLC_SRM_DRAM_DATA__DATA_MASK 0xffffffff
@@ -17946,8 +17938,6 @@
 #define VGT_TESS_DISTRIBUTION__ACCUM_TRI__SHIFT 0x8
 #define VGT_TESS_DISTRIBUTION__ACCUM_QUAD_MASK 0xff0000
 #define VGT_TESS_DISTRIBUTION__ACCUM_QUAD__SHIFT 0x10
-#define VGT_TESS_DISTRIBUTION__DONUT_SPLIT_MASK 0xff000000
-#define VGT_TESS_DISTRIBUTION__DONUT_SPLIT__SHIFT 0x18
 #define VGT_TF_RING_SIZE__SIZE_MASK 0xffff
 #define VGT_TF_RING_SIZE__SIZE__SHIFT 0x0
 #define VGT_SYS_CONFIG__DUAL_CORE_EN_MASK 0x1
@@ -20502,8 +20492,6 @@
 #define DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_SQ_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_SQ_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_SQ_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_SQ_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20558,8 +20546,6 @@
 #define DIDT_DB_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_DB_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_DB_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_DB_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_DB_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_DB_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_DB_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20614,8 +20600,6 @@
 #define DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_TD_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_TD_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_TD_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_TD_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_TD_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20670,8 +20654,6 @@
 #define DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_TCP_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_TCP_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_TCP_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_TCP_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20726,8 +20708,6 @@
 #define DIDT_DBR_CTRL0__DIDT_CTRL_RST__SHIFT 0x4
 #define DIDT_DBR_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK 0x20
 #define DIDT_DBR_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT 0x5
-#define DIDT_DBR_CTRL0__UNUSED_0_MASK 0xffffffc0
-#define DIDT_DBR_CTRL0__UNUSED_0__SHIFT 0x6
 #define DIDT_DBR_CTRL1__MIN_POWER_MASK 0xffff
 #define DIDT_DBR_CTRL1__MIN_POWER__SHIFT 0x0
 #define DIDT_DBR_CTRL1__MAX_POWER_MASK 0xffff0000
@@ -20773,4 +20753,84 @@
 #define DIDT_DBR_WEIGHT8_11__WEIGHT11_MASK 0xff000000
 #define DIDT_DBR_WEIGHT8_11__WEIGHT11__SHIFT 0x18
 
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK    0x00000001
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT  0x00000000
+
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK       0x0000007e
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK       0x00001f80L
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT     0x00000001
+#define DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT     0x00000007
+
+#define DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK   0x1fffe000L
+#define DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_SQ_STALL_CTRL__UNUSED_0_MASK                  0xe0000000L
+#define DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT                0x0000001d
+
+#define DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK       0x00000001L
+#define DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT     0x00000000
+
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK       0x00007ffeL
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT     0x00000001
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK       0x1fff8000L
+#define DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT     0x0000000f
+
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK    0x00000001L
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT  0x00000000
+
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK       0x0000007eL
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK       0x00001f80L
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT     0x00000001
+#define DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT     0x00000007
+
+#define DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK   0x1fffe000L
+#define DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK     0x00000fc0L
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK     0x0003f000L
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT   0x00000006
+#define DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT   0x0000000c
+
+#define DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK       0x00000001L
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK       0x00007ffeL
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK       0x1fff8000L
+
+#define DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT     0x00000000
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT     0x00000001
+#define DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT     0x0000000f
+
+#define DIDT_TD_STALL_CTRL__UNUSED_0_MASK                  0xe0000000L
+#define DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT                0x0000001d
+
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK     0x00000fc0L
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK     0x0003f000L
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT   0x00000006
+#define DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT   0x0000000c
+
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK   0x00000001L
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT 0x00000000
+
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK      0x0000007eL
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK      0x00001f80L
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT    0x00000001
+#define DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT    0x00000007
+
+#define DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK  0x1fffe000L
+#define DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT 0x0000000d
+
+#define DIDT_TCP_STALL_CTRL__UNUSED_0_MASK                 0xe0000000L
+#define DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT               0x0000001d
+
+#define DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK      0x00000001L
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK      0x00007ffeL
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK      0x1fff8000L
+#define DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT    0x00000000
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT    0x00000001
+#define DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT    0x0000000f
+
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK    0x00000fc0L
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK    0x0003f000L
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT  0x00000006
+#define DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT  0x0000000c
+
 #endif /* GFX_8_0_SH_MASK_H */
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 32f3e345de08..3493da5c8f0e 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -5538,6 +5538,78 @@ typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_5
   ULONG  ulReserved[12];
 }ATOM_ASIC_PROFILING_INFO_V3_5;
 
+/* for Polars10/11 AVFS parameters */
+typedef struct  _ATOM_ASIC_PROFILING_INFO_V3_6
+{
+  ATOM_COMMON_TABLE_HEADER         asHeader;
+  ULONG  ulMaxVddc;
+  ULONG  ulMinVddc;
+  USHORT usLkgEuseIndex;
+  UCHAR  ucLkgEfuseBitLSB;
+  UCHAR  ucLkgEfuseLength;
+  ULONG  ulLkgEncodeLn_MaxDivMin;
+  ULONG  ulLkgEncodeMax;
+  ULONG  ulLkgEncodeMin;
+  EFUSE_LINEAR_FUNC_PARAM sRoFuse;
+  ULONG  ulEvvDefaultVddc;
+  ULONG  ulEvvNoCalcVddc;
+  ULONG  ulSpeed_Model;
+  ULONG  ulSM_A0;
+  ULONG  ulSM_A1;
+  ULONG  ulSM_A2;
+  ULONG  ulSM_A3;
+  ULONG  ulSM_A4;
+  ULONG  ulSM_A5;
+  ULONG  ulSM_A6;
+  ULONG  ulSM_A7;
+  UCHAR  ucSM_A0_sign;
+  UCHAR  ucSM_A1_sign;
+  UCHAR  ucSM_A2_sign;
+  UCHAR  ucSM_A3_sign;
+  UCHAR  ucSM_A4_sign;
+  UCHAR  ucSM_A5_sign;
+  UCHAR  ucSM_A6_sign;
+  UCHAR  ucSM_A7_sign;
+  ULONG  ulMargin_RO_a;
+  ULONG  ulMargin_RO_b;
+  ULONG  ulMargin_RO_c;
+  ULONG  ulMargin_fixed;
+  ULONG  ulMargin_Fmax_mean;
+  ULONG  ulMargin_plat_mean;
+  ULONG  ulMargin_Fmax_sigma;
+  ULONG  ulMargin_plat_sigma;
+  ULONG  ulMargin_DC_sigma;
+  ULONG  ulLoadLineSlop;
+  ULONG  ulaTDClimitPerDPM[8];
+  ULONG  ulaNoCalcVddcPerDPM[8];
+  ULONG  ulAVFS_meanNsigma_Acontant0;
+  ULONG  ulAVFS_meanNsigma_Acontant1;
+  ULONG  ulAVFS_meanNsigma_Acontant2;
+  USHORT usAVFS_meanNsigma_DC_tol_sigma;
+  USHORT usAVFS_meanNsigma_Platform_mean;
+  USHORT usAVFS_meanNsigma_Platform_sigma;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSOFF_a2;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a0;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a1;
+  ULONG  ulGB_VDROOP_TABLE_CKSON_a2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+  USHORT usAVFSGB_FUSE_TABLE_CKSON_m2;
+  ULONG  ulAVFSGB_FUSE_TABLE_CKSON_b;
+  USHORT usMaxVoltage_0_25mv;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_VDROOP_TABLE_CKSON;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSOFF;
+  UCHAR  ucEnableGB_FUSE_TABLE_CKSON;
+  USHORT usPSM_Age_ComFactor;
+  UCHAR  ucEnableApplyAVFS_CKS_OFF_Voltage;
+  UCHAR  ucReserved;
+}ATOM_ASIC_PROFILING_INFO_V3_6;
+
 
 typedef struct _ATOM_SCLK_FCW_RANGE_ENTRY_V1{
   ULONG  ulMaxSclkFreq;
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index a461e155a160..0c8c85d2a2a5 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -49,6 +49,7 @@ enum cgs_ind_reg {
 	CGS_IND_REG__SMC,
 	CGS_IND_REG__UVD_CTX,
 	CGS_IND_REG__DIDT,
+	CGS_IND_REG_GC_CAC,
 	CGS_IND_REG__AUDIO_ENDPT
 };
 
@@ -115,6 +116,7 @@ enum cgs_system_info_id {
 	CGS_SYSTEM_INFO_CG_FLAGS,
 	CGS_SYSTEM_INFO_PG_FLAGS,
 	CGS_SYSTEM_INFO_GFX_CU_INFO,
+	CGS_SYSTEM_INFO_GFX_SE_INFO,
 	CGS_SYSTEM_INFO_ID_MAXIMUM,
 };
 
@@ -189,7 +191,6 @@ typedef unsigned long cgs_handle_t;
 
 struct cgs_acpi_method_argument {
 	uint32_t type;
-	uint32_t method_length;
 	uint32_t data_length;
 	union{
 		uint32_t value;
@@ -581,6 +582,9 @@ typedef int (*cgs_get_firmware_info)(struct cgs_device *cgs_device,
 				     enum cgs_ucode_id type,
 				     struct cgs_firmware_info *info);
 
+typedef int (*cgs_rel_firmware)(struct cgs_device *cgs_device,
+					 enum cgs_ucode_id type);
+
 typedef int(*cgs_set_powergating_state)(struct cgs_device *cgs_device,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state);
@@ -645,6 +649,7 @@ struct cgs_ops {
 	cgs_set_camera_voltages_t set_camera_voltages;
 	/* Firmware Info */
 	cgs_get_firmware_info get_firmware_info;
+	cgs_rel_firmware rel_firmware;
 	/* cg pg interface*/
 	cgs_set_powergating_state set_powergating_state;
 	cgs_set_clockgating_state set_clockgating_state;
@@ -738,6 +743,8 @@ struct cgs_device
 	CGS_CALL(set_camera_voltages,dev,mask,voltages)
 #define cgs_get_firmware_info(dev, type, info)	\
 	CGS_CALL(get_firmware_info, dev, type, info)
+#define cgs_rel_firmware(dev, type)	\
+	CGS_CALL(rel_firmware, dev, type)
 #define cgs_set_powergating_state(dev, block_type, state)	\
 	CGS_CALL(set_powergating_state, dev, block_type, state)
 #define cgs_set_clockgating_state(dev, block_type, state)	\
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 8e345bfddb69..f9e03ad0baa2 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -73,11 +73,14 @@ static int pp_sw_init(void *handle)
 
 	ret = hwmgr->hwmgr_func->backend_init(hwmgr);
 	if (ret)
-		goto err;
+		goto err1;
 
 	pr_info("amdgpu: powerplay initialized\n");
 
 	return 0;
+err1:
+	if (hwmgr->pptable_func->pptable_fini)
+		hwmgr->pptable_func->pptable_fini(hwmgr);
 err:
 	pr_err("amdgpu: powerplay initialization failed\n");
 	return ret;
@@ -100,6 +103,9 @@ static int pp_sw_fini(void *handle)
 	if (hwmgr->hwmgr_func->backend_fini != NULL)
 		ret = hwmgr->hwmgr_func->backend_fini(hwmgr);
 
+	if (hwmgr->pptable_func->pptable_fini)
+		hwmgr->pptable_func->pptable_fini(hwmgr);
+
 	return ret;
 }
 
@@ -530,6 +536,10 @@ int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_event event_id, void *input,
 	case AMD_PP_EVENT_COMPLETE_INIT:
 		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
 		break;
+	case AMD_PP_EVENT_READJUST_POWER_STATE:
+		pp_handle->hwmgr->current_ps = pp_handle->hwmgr->boot_ps;
+		ret = pem_handle_event(pp_handle->eventmgr, event_id, &data);
+		break;
 	default:
 		break;
 	}
@@ -734,12 +744,12 @@ static int pp_dpm_get_pp_table(void *handle, char **table)
 
 	PP_CHECK_HW(hwmgr);
 
-	if (hwmgr->hwmgr_func->get_pp_table == NULL) {
-		printk(KERN_INFO "%s was not implemented.\n", __func__);
-		return 0;
-	}
+	if (!hwmgr->soft_pp_table)
+		return -EINVAL;
+
+	*table = (char *)hwmgr->soft_pp_table;
 
-	return hwmgr->hwmgr_func->get_pp_table(hwmgr, table);
+	return hwmgr->soft_pp_table_size;
 }
 
 static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
@@ -753,12 +763,23 @@ static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
 
 	PP_CHECK_HW(hwmgr);
 
-	if (hwmgr->hwmgr_func->set_pp_table == NULL) {
-		printk(KERN_INFO "%s was not implemented.\n", __func__);
-		return 0;
+	if (!hwmgr->hardcode_pp_table) {
+		hwmgr->hardcode_pp_table =
+				kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
+
+		if (!hwmgr->hardcode_pp_table)
+			return -ENOMEM;
+
+		/* to avoid powerplay crash when hardcode pptable is empty */
+		memcpy(hwmgr->hardcode_pp_table, hwmgr->soft_pp_table,
+				hwmgr->soft_pp_table_size);
 	}
 
-	return hwmgr->hwmgr_func->set_pp_table(hwmgr, buf, size);
+	memcpy(hwmgr->hardcode_pp_table, buf, size);
+
+	hwmgr->soft_pp_table = hwmgr->hardcode_pp_table;
+
+	return amd_powerplay_reset(handle);
 }
 
 static int pp_dpm_force_clock_level(void *handle,
@@ -800,6 +821,82 @@ static int pp_dpm_print_clock_levels(void *handle,
 	return hwmgr->hwmgr_func->print_clock_levels(hwmgr, type, buf);
 }
 
+static int pp_dpm_get_sclk_od(void *handle)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_sclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->get_sclk_od(hwmgr);
+}
+
+static int pp_dpm_set_sclk_od(void *handle, uint32_t value)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->set_sclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->set_sclk_od(hwmgr, value);
+}
+
+static int pp_dpm_get_mclk_od(void *handle)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->get_mclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->get_mclk_od(hwmgr);
+}
+
+static int pp_dpm_set_mclk_od(void *handle, uint32_t value)
+{
+	struct pp_hwmgr *hwmgr;
+
+	if (!handle)
+		return -EINVAL;
+
+	hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+	PP_CHECK_HW(hwmgr);
+
+	if (hwmgr->hwmgr_func->set_mclk_od == NULL) {
+		printk(KERN_INFO "%s was not implemented.\n", __func__);
+		return 0;
+	}
+
+	return hwmgr->hwmgr_func->set_mclk_od(hwmgr, value);
+}
+
 const struct amd_powerplay_funcs pp_dpm_funcs = {
 	.get_temperature = pp_dpm_get_temperature,
 	.load_firmware = pp_dpm_load_fw,
@@ -822,6 +919,10 @@ const struct amd_powerplay_funcs pp_dpm_funcs = {
 	.set_pp_table = pp_dpm_set_pp_table,
 	.force_clock_level = pp_dpm_force_clock_level,
 	.print_clock_levels = pp_dpm_print_clock_levels,
+	.get_sclk_od = pp_dpm_get_sclk_od,
+	.set_sclk_od = pp_dpm_set_sclk_od,
+	.get_mclk_od = pp_dpm_get_mclk_od,
+	.set_mclk_od = pp_dpm_set_mclk_od,
 };
 
 static int amd_pp_instance_init(struct amd_pp_init *pp_init,
@@ -903,6 +1004,44 @@ int amd_powerplay_fini(void *handle)
 	return 0;
 }
 
+int amd_powerplay_reset(void *handle)
+{
+	struct pp_instance *instance = (struct pp_instance *)handle;
+	struct pp_eventmgr *eventmgr;
+	struct pem_event_data event_data = { {0} };
+	int ret;
+
+	if (instance == NULL)
+		return -EINVAL;
+
+	eventmgr = instance->eventmgr;
+	if (!eventmgr || !eventmgr->pp_eventmgr_fini)
+		return -EINVAL;
+
+	eventmgr->pp_eventmgr_fini(eventmgr);
+
+	ret = pp_sw_fini(handle);
+	if (ret)
+		return ret;
+
+	kfree(instance->hwmgr->ps);
+
+	ret = pp_sw_init(handle);
+	if (ret)
+		return ret;
+
+	hw_init_power_state_table(instance->hwmgr);
+
+	if (eventmgr == NULL || eventmgr->pp_eventmgr_init == NULL)
+		return -EINVAL;
+
+	ret = eventmgr->pp_eventmgr_init(eventmgr);
+	if (ret)
+		return ret;
+
+	return pem_handle_event(eventmgr, AMD_PP_EVENT_COMPLETE_INIT, &event_data);
+}
+
 /* export this function to DAL */
 
 int amd_powerplay_display_configuration_change(void *handle,
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
index 46410e3c7349..fb88e4e5d625 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventmgr.c
@@ -58,9 +58,6 @@ static void pem_fini(struct pp_eventmgr *eventmgr)
 	pem_unregister_interrupts(eventmgr);
 
 	pem_handle_event(eventmgr, AMD_PP_EVENT_UNINITIALIZE, &event_data);
-
-	if (eventmgr != NULL)
-		kfree(eventmgr);
 }
 
 int eventmgr_init(struct pp_instance *handle)
diff --git a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
index 5cd123472db4..b6f45fd01fa6 100644
--- a/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
+++ b/drivers/gpu/drm/amd/powerplay/eventmgr/eventtasks.c
@@ -132,8 +132,7 @@ int pem_task_enable_dynamic_state_management(struct pp_eventmgr *eventmgr, struc
 
 int pem_task_disable_dynamic_state_management(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
 {
-	/* TODO */
-	return 0;
+	return phm_disable_dynamic_state_management(eventmgr->hwmgr);
 }
 
 int pem_task_enable_clock_power_gatings_tasks(struct pp_eventmgr *eventmgr, struct pem_event_data *event_data)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
index 436fc16dabb6..2da548f6337e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
@@ -206,7 +206,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 							AMD_IP_BLOCK_TYPE_VCE,
 							AMD_PG_STATE_GATE);
 				cz_enable_disable_vce_dpm(hwmgr, false);
-			/* TODO: to figure out why vce can't be poweroff*/
+				cz_dpm_powerdown_vce(hwmgr);
 				cz_hwmgr->vce_power_gated = true;
 			} else {
 				cz_dpm_powerup_vce(hwmgr);
@@ -225,6 +225,7 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 			}
 		}
 	} else {
+		cz_hwmgr->vce_power_gated = bgate;
 		cz_dpm_update_vce_dpm(hwmgr);
 		cz_enable_disable_vce_dpm(hwmgr, !bgate);
 		return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index 1f14c477d15d..9bf622e123b6 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -1180,6 +1180,13 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 static int cz_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	int result = 0;
+	struct cz_hwmgr *data;
+
+	data = kzalloc(sizeof(struct cz_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
 
 	result = cz_initialize_dpm_defaults(hwmgr);
 	if (result != 0) {
@@ -1909,15 +1916,7 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = {
 
 int cz_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct cz_hwmgr *cz_hwmgr;
-	int ret = 0;
-
-	cz_hwmgr = kzalloc(sizeof(struct cz_hwmgr), GFP_KERNEL);
-	if (cz_hwmgr == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = cz_hwmgr;
 	hwmgr->hwmgr_func = &cz_hwmgr_funcs;
 	hwmgr->pptable_func = &pptable_funcs;
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 24a16e49b571..744aa886a2be 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -581,25 +581,24 @@ static int fiji_patch_boot_state(struct pp_hwmgr *hwmgr,
 
 static int fiji_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
 static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_hwmgr *data;
 	uint32_t i;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	bool stay_in_boot;
 	int result;
 
+	data = kzalloc(sizeof(struct fiji_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
 	data->dll_default_on = false;
 	data->sram_end = SMC_RAM_END;
 
@@ -633,6 +632,8 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = FIJI_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = FIJI_VOLTAGE_CONTROL_NONE;
 
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = FIJI_VOLTAGE_CONTROL_BY_SVID2;
@@ -732,7 +733,7 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -741,7 +742,7 @@ static int fiji_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 	} else {
@@ -1234,6 +1235,34 @@ static int fiji_program_voting_clients(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_clear_voting_clients(struct pp_hwmgr *hwmgr)
+{
+	/* Reset voting clients before disabling DPM */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_SCLK_CNT, 1);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_BUSY_CNT, 1);
+
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_0, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_1, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_2, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_3, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_4, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_5, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_6, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_7, 0);
+
+	return 0;
+}
+
 /**
 * Get the location of various tables inside the FW image.
 *
@@ -1361,6 +1390,17 @@ static int fiji_copy_and_switch_arb_sets(struct pp_hwmgr *hwmgr,
 }
 
 /**
+* Call SMC to reset S0/S1 to S1 and Reset SMIO to initial value
+*
+* @param    hwmgr  the address of the powerplay hardware manager.
+* @return   if success then 0;
+*/
+static int fiji_reset_to_default(struct pp_hwmgr *hwmgr)
+{
+	return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_ResetToDefaults);
+}
+
+/**
 * Initial switch from ARB F0->F1
 *
 * @param    hwmgr  the address of the powerplay hardware manager.
@@ -1373,6 +1413,21 @@ static int fiji_initial_switch_from_arbf0_to_f1(struct pp_hwmgr *hwmgr)
 			MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1);
 }
 
+static int fiji_force_switch_to_arbf0(struct pp_hwmgr *hwmgr)
+{
+	uint32_t tmp;
+
+	tmp = (cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, ixSMC_SCRATCH9) &
+			0x0000ff00) >> 8;
+
+	if (tmp == MC_CG_ARB_FREQ_F0)
+		return 0;
+
+	return fiji_copy_and_switch_arb_sets(hwmgr,
+			tmp, MC_CG_ARB_FREQ_F0);
+}
+
 static int fiji_reset_single_dpm_table(struct pp_hwmgr *hwmgr,
 		struct fiji_single_dpm_table *dpm_table, uint32_t count)
 {
@@ -1830,7 +1885,7 @@ static uint16_t fiji_find_closest_vddci(struct pp_hwmgr *hwmgr, uint16_t vddci)
 
 	PP_ASSERT_WITH_CODE(false,
 			"VDDCI is larger than max VDDCI in VDDCI Voltage Table!",
-			return vddci_table->entries[i].value);
+			return vddci_table->entries[i-1].value);
 }
 
 static int fiji_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
@@ -3175,6 +3230,17 @@ static int fiji_enable_ulv(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_ulv(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_ulv_parm *ulv = &(data->ulv);
+
+	if (ulv->ulv_supported)
+		return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_DisableULV);
+
+	return 0;
+}
+
 static int fiji_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -3195,6 +3261,21 @@ static int fiji_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
+{
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_SclkDeepSleep)) {
+		if (smum_send_msg_to_smc(hwmgr->smumgr,
+				PPSMC_MSG_MASTER_DeepSleep_OFF)) {
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to disable Master Deep Sleep switch failed!",
+					return -1);
+		}
+	}
+
+	return 0;
+}
+
 static int fiji_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -3355,6 +3436,70 @@ static int fiji_start_dpm(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int fiji_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	/* disable SCLK dpm */
+	if (!data->sclk_dpm_key_disabled)
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_DPM_Disable) == 0),
+				"Failed to disable SCLK DPM!",
+				return -1);
+
+	/* disable MCLK dpm */
+	if (!data->mclk_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+				PPSMC_MSG_MCLKDPM_SetEnabledMask, 1) == 0),
+				"Failed to force MCLK DPM0!",
+				return -1);
+
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_MCLKDPM_Disable) == 0),
+				"Failed to disable MCLK DPM!",
+				return -1);
+	}
+
+	return 0;
+}
+
+static int fiji_stop_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	/* disable general power management */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT,
+			GLOBAL_PWRMGT_EN, 0);
+	/* disable sclk deep sleep */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SCLK_PWRMGT_CNTL,
+			DYNAMIC_PM_EN, 0);
+
+	/* disable PCIE dpm */
+	if (!data->pcie_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_PCIeDPM_Disable) == 0),
+				"Failed to disable pcie DPM during DPM Stop Function!",
+				return -1);
+	}
+
+	if (fiji_disable_sclk_mclk_dpm(hwmgr)) {
+		printk(KERN_ERR "Failed to disable Sclk DPM and Mclk DPM!");
+		return -1;
+	}
+
+	PP_ASSERT_WITH_CODE(
+			(smum_send_msg_to_smc(hwmgr->smumgr,
+					PPSMC_MSG_Voltage_Cntl_Disable) == 0),
+			"Failed to disable voltage DPM during DPM Stop Function!",
+			return -1);
+
+	return 0;
+}
+
 static void fiji_set_dpm_event_sources(struct pp_hwmgr *hwmgr,
 		uint32_t sources)
 {
@@ -3413,6 +3558,23 @@ static int fiji_enable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
 	return fiji_enable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
 }
 
+static int fiji_disable_auto_throttle_source(struct pp_hwmgr *hwmgr,
+		PHM_AutoThrottleSource source)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+	if (data->active_auto_throttle_sources & (1 << source)) {
+		data->active_auto_throttle_sources &= ~(1 << source);
+		fiji_set_dpm_event_sources(hwmgr, data->active_auto_throttle_sources);
+	}
+	return 0;
+}
+
+static int fiji_disable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
+{
+	return fiji_disable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
+}
+
 static int fiji_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
 	int tmp_result, result = 0;
@@ -3527,6 +3689,64 @@ static int fiji_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+static int fiji_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
+{
+	int tmp_result, result = 0;
+
+	tmp_result = (fiji_is_dpm_running(hwmgr)) ? 0 : -1;
+	PP_ASSERT_WITH_CODE(tmp_result == 0,
+			"DPM is not running right now, no need to disable DPM!",
+			return 0);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ThermalController))
+		PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+				GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, 1);
+
+	tmp_result = fiji_disable_power_containment(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable power containment!", result = tmp_result);
+
+	tmp_result = fiji_disable_smc_cac(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable SMC CAC!", result = tmp_result);
+
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			CG_SPLL_SPREAD_SPECTRUM, SSEN, 0);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, 0);
+
+	tmp_result = fiji_disable_thermal_auto_throttle(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable thermal auto throttle!", result = tmp_result);
+
+	tmp_result = fiji_stop_dpm(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to stop DPM!", result = tmp_result);
+
+	tmp_result = fiji_disable_deep_sleep_master_switch(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable deep sleep master switch!", result = tmp_result);
+
+	tmp_result = fiji_disable_ulv(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable ULV!", result = tmp_result);
+
+	tmp_result = fiji_clear_voting_clients(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to clear voting clients!", result = tmp_result);
+
+	tmp_result = fiji_reset_to_default(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to reset to default!", result = tmp_result);
+
+	tmp_result = fiji_force_switch_to_arbf0(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to force to switch arbf0!", result = tmp_result);
+
+	return result;
+}
+
 static int fiji_force_dpm_highest(struct pp_hwmgr *hwmgr)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -5069,42 +5289,6 @@ static int fiji_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
-static int fiji_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int fiji_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int fiji_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -5274,12 +5458,96 @@ bool fiji_check_smc_update_required_for_display_configuration(struct pp_hwmgr *h
 	return is_update_required;
 }
 
+static int fiji_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct fiji_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int fiji_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct fiji_power_state  *fiji_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	fiji_ps = cast_phw_fiji_power_state(&ps->hardware);
+
+	fiji_ps->performance_levels[fiji_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int fiji_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct fiji_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int fiji_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	struct fiji_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct fiji_power_state  *fiji_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	fiji_ps = cast_phw_fiji_power_state(&ps->hardware);
+
+	fiji_ps->performance_levels[fiji_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
 
 static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.backend_init = &fiji_hwmgr_backend_init,
 	.backend_fini = &fiji_hwmgr_backend_fini,
 	.asic_setup = &fiji_setup_asic_task,
 	.dynamic_state_management_enable = &fiji_enable_dpm_tasks,
+	.dynamic_state_management_disable = &fiji_disable_dpm_tasks,
 	.force_dpm_level = &fiji_dpm_force_dpm_level,
 	.get_num_of_pp_table_entries = &tonga_get_number_of_powerplay_table_entries,
 	.get_power_state_size = &fiji_get_power_state_size,
@@ -5312,24 +5580,18 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
 	.get_fan_control_mode = fiji_get_fan_control_mode,
 	.check_states_equal = fiji_check_states_equal,
 	.check_smc_update_required_for_display_configuration = fiji_check_smc_update_required_for_display_configuration,
-	.get_pp_table = fiji_get_pp_table,
-	.set_pp_table = fiji_set_pp_table,
 	.force_clock_level = fiji_force_clock_level,
 	.print_clock_levels = fiji_print_clock_levels,
+	.get_sclk_od = fiji_get_sclk_od,
+	.set_sclk_od = fiji_set_sclk_od,
+	.get_mclk_od = fiji_get_mclk_od,
+	.set_mclk_od = fiji_set_mclk_od,
 };
 
 int fiji_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct fiji_hwmgr  *data;
-	int ret = 0;
-
-	data = kzalloc(sizeof(struct fiji_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &fiji_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_fiji_thermal_initialize(hwmgr);
-	return ret;
+	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
index 170edf5a772d..bf67c2a92c68 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
@@ -302,9 +302,6 @@ struct fiji_hwmgr {
 	bool                           pg_acp_init;
 	bool                           frtc_enabled;
 	bool                           frtc_status_changed;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
 };
 
 /* To convert to Q8.8 format for firmware */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
index db23a4068baf..44658451a8d2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.c
@@ -73,17 +73,18 @@ void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr)
 
 	if (!tmp) {
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
-				PHM_PlatformCaps_PowerContainment);
-
-		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 				PHM_PlatformCaps_CAC);
 
 		fiji_hwmgr->fast_watermark_threshold = 100;
 
-		tmp = 1;
-		fiji_hwmgr->enable_dte_feature = tmp ? false : true;
-		fiji_hwmgr->enable_tdc_limit_feature = tmp ? true : false;
-		fiji_hwmgr->enable_pkg_pwr_tracking_feature = tmp ? true : false;
+		if (hwmgr->powercontainment_enabled) {
+			phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				    PHM_PlatformCaps_PowerContainment);
+			tmp = 1;
+			fiji_hwmgr->enable_dte_feature = tmp ? false : true;
+			fiji_hwmgr->enable_tdc_limit_feature = tmp ? true : false;
+			fiji_hwmgr->enable_pkg_pwr_tracking_feature = tmp ? true : false;
+		}
 	}
 }
 
@@ -459,6 +460,23 @@ int fiji_enable_smc_cac(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int fiji_disable_smc_cac(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_CAC) && data->cac_enabled) {
+		int smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+				(uint16_t)(PPSMC_MSG_DisableCac));
+		PP_ASSERT_WITH_CODE((smc_result == 0),
+				"Failed to disable CAC in SMC.", result = -1);
+
+		data->cac_enabled = false;
+	}
+	return result;
+}
+
 int fiji_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 {
 	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
@@ -528,6 +546,48 @@ int fiji_enable_power_containment(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int fiji_disable_power_containment(struct pp_hwmgr *hwmgr)
+{
+	struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment) &&
+			data->power_containment_features) {
+		int smc_result;
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_TDCLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_TDCLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable TDCLimit in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_DTE) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_DisableDTE));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable DTE in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_PkgPwrLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_PkgPwrLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable PkgPwrTracking in SMC.",
+					result = smc_result);
+		}
+		data->power_containment_features = 0;
+	}
+
+	return result;
+}
+
 int fiji_power_control_set_level(struct pp_hwmgr *hwmgr)
 {
 	struct phm_ppt_v1_information *table_info =
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
index 55e58200f33a..fec772421733 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_powertune.h
@@ -36,6 +36,19 @@ enum fiji_pt_config_reg_type {
 #define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
 #define POWERCONTAINMENT_FEATURE_PkgPwrLimit     0x00000004
 
+#define DIDT_SQ_CTRL0__UNUSED_0_MASK             0xffffffc0
+#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT           0x6
+#define DIDT_TD_CTRL0__UNUSED_0_MASK             0xffffffc0
+#define DIDT_TD_CTRL0__UNUSED_0__SHIFT           0x6
+#define DIDT_TCP_CTRL0__UNUSED_0_MASK            0xffffffc0
+#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT          0x6
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK                 0xe0000000
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001d
+#define DIDT_TD_TUNING_CTRL__UNUSED_0_MASK                 0xe0000000
+#define DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001d
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK                0xe0000000
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT              0x0000001d
+
 struct fiji_pt_config_reg {
 	uint32_t                           offset;
 	uint32_t                           mask;
@@ -58,7 +71,9 @@ void fiji_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr);
 int fiji_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr);
 int fiji_populate_pm_fuses(struct pp_hwmgr *hwmgr);
 int fiji_enable_smc_cac(struct pp_hwmgr *hwmgr);
+int fiji_disable_smc_cac(struct pp_hwmgr *hwmgr);
 int fiji_enable_power_containment(struct pp_hwmgr *hwmgr);
+int fiji_disable_power_containment(struct pp_hwmgr *hwmgr);
 int fiji_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
 int fiji_power_control_set_level(struct pp_hwmgr *hwmgr);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index fa208ada6892..789f98ad2615 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -154,6 +154,30 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 	return ret;
 }
 
+int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr)
+{
+	int ret = -1;
+	bool enabled;
+
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+		PHM_PlatformCaps_TablelessHardwareInterface)) {
+		if (hwmgr->hwmgr_func->dynamic_state_management_disable)
+			ret = hwmgr->hwmgr_func->dynamic_state_management_disable(hwmgr);
+	} else {
+		ret = phm_dispatch_table(hwmgr,
+				&(hwmgr->disable_dynamic_state_management),
+				NULL, NULL);
+	}
+
+	enabled = ret == 0 ? false : true;
+
+	cgs_notify_dpm_enabled(hwmgr->device, enabled);
+
+	return ret;
+}
+
 int phm_force_dpm_levels(struct pp_hwmgr *hwmgr, enum amd_dpm_forced_level level)
 {
 	PHM_FUNC_CHECK(hwmgr);
@@ -306,11 +330,15 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 {
 	PHM_FUNC_CHECK(hwmgr);
 
-	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+	if (display_config == NULL)
 		return -EINVAL;
 
 	hwmgr->display_config = *display_config;
-	/* to do pass other display configuration in furture */
+
+	if (hwmgr->hwmgr_func->store_cc6_data == NULL)
+		return -EINVAL;
+
+	/* TODO: pass other display configuration in the future */
 
 	if (hwmgr->hwmgr_func->store_cc6_data)
 		hwmgr->hwmgr_func->store_cc6_data(hwmgr,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index 1c48917da3cf..03b6128ebc20 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -24,6 +24,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <drm/amdgpu_drm.h>
 #include "cgs_common.h"
 #include "power_state.h"
 #include "hwmgr.h"
@@ -58,12 +59,13 @@ int hwmgr_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 	hwmgr->hw_revision = pp_init->rev_id;
 	hwmgr->usec_timeout = AMD_MAX_USEC_TIMEOUT;
 	hwmgr->power_source = PP_PowerSource_AC;
+	hwmgr->powercontainment_enabled = pp_init->powercontainment_enabled;
 
 	switch (hwmgr->chip_family) {
-	case AMD_FAMILY_CZ:
+	case AMDGPU_FAMILY_CZ:
 		cz_hwmgr_init(hwmgr);
 		break;
-	case AMD_FAMILY_VI:
+	case AMDGPU_FAMILY_VI:
 		switch (hwmgr->chip_id) {
 		case CHIP_TONGA:
 			tonga_hwmgr_init(hwmgr);
@@ -93,6 +95,15 @@ int hwmgr_fini(struct pp_hwmgr *hwmgr)
 	if (hwmgr == NULL || hwmgr->ps == NULL)
 		return -EINVAL;
 
+	/* do hwmgr finish*/
+	kfree(hwmgr->hardcode_pp_table);
+
+	kfree(hwmgr->backend);
+
+	kfree(hwmgr->start_thermal_controller.function_list);
+
+	kfree(hwmgr->set_temperature_range.function_list);
+
 	kfree(hwmgr->ps);
 	kfree(hwmgr);
 	return 0;
@@ -462,7 +473,7 @@ uint16_t phm_find_closest_vddci(struct pp_atomctrl_voltage_table *vddci_table, u
 
 	PP_ASSERT_WITH_CODE(false,
 			"VDDCI is larger than max VDDCI in VDDCI Voltage Table!",
-			return vddci_table->entries[i].value);
+			return vddci_table->entries[i-1].value);
 }
 
 int phm_find_boot_level(void *table,
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
index 347fef127ce9..2930a3355948 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr_ppt.h
@@ -39,6 +39,7 @@ struct phm_ppt_v1_clock_voltage_dependency_record {
 	uint8_t phases;
 	uint8_t cks_enable;
 	uint8_t cks_voffset;
+	uint32_t sclk_offset;
 };
 
 typedef struct phm_ppt_v1_clock_voltage_dependency_record phm_ppt_v1_clock_voltage_dependency_record;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
index 8f142a74ad08..aeec25c66aa8 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_clockpowergating.c
@@ -106,11 +106,17 @@ int polaris10_phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 	data->uvd_power_gated = bgate;
 
 	if (bgate) {
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_UVD,
+				AMD_CG_STATE_GATE);
 		polaris10_update_uvd_dpm(hwmgr, true);
 		polaris10_phm_powerdown_uvd(hwmgr);
 	} else {
 		polaris10_phm_powerup_uvd(hwmgr);
 		polaris10_update_uvd_dpm(hwmgr, false);
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_UVD,
+				AMD_PG_STATE_UNGATE);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
index aa6be033f21b..9d764c4d253e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.c
@@ -389,6 +389,34 @@ static int polaris10_program_voting_clients(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_clear_voting_clients(struct pp_hwmgr *hwmgr)
+{
+	/* Reset voting clients before disabling DPM */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_SCLK_CNT, 1);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			SCLK_PWRMGT_CNTL, RESET_BUSY_CNT, 1);
+
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_0, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_1, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_2, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_3, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_4, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_5, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_6, 0);
+	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+			ixCG_FREQ_TRAN_VOTING_7, 0);
+
+	return 0;
+}
+
 /**
 * Get the location of various tables inside the FW image.
 *
@@ -515,6 +543,11 @@ static int polaris10_copy_and_switch_arb_sets(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+static int polaris10_reset_to_default(struct pp_hwmgr *hwmgr)
+{
+	return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_ResetToDefaults);
+}
+
 /**
 * Initial switch from ARB F0->F1
 *
@@ -528,6 +561,21 @@ static int polaris10_initial_switch_from_arbf0_to_f1(struct pp_hwmgr *hwmgr)
 			MC_CG_ARB_FREQ_F0, MC_CG_ARB_FREQ_F1);
 }
 
+static int polaris10_force_switch_to_arbf0(struct pp_hwmgr *hwmgr)
+{
+	uint32_t tmp;
+
+	tmp = (cgs_read_ind_register(hwmgr->device,
+			CGS_IND_REG__SMC, ixSMC_SCRATCH9) &
+			0x0000ff00) >> 8;
+
+	if (tmp == MC_CG_ARB_FREQ_F0)
+		return 0;
+
+	return polaris10_copy_and_switch_arb_sets(hwmgr,
+			tmp, MC_CG_ARB_FREQ_F0);
+}
+
 static int polaris10_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -999,7 +1047,7 @@ static int polaris10_get_dependency_volt_by_clk(struct pp_hwmgr *hwmgr,
 				vddci = phm_find_closest_vddci(&(data->vddci_voltage_table),
 						(dep_table->entries[i].vddc -
 								(uint16_t)data->vddc_vddci_delta));
-				*voltage |= (vddci * VOLTAGE_SCALE) <<	VDDCI_SHIFT;
+				*voltage |= (vddci * VOLTAGE_SCALE) << VDDCI_SHIFT;
 			}
 
 			if (POLARIS10_VOLTAGE_CONTROL_NONE == data->mvdd_control)
@@ -1296,7 +1344,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	}
 
 	mem_level->MclkFrequency = clock;
-	mem_level->StutterEnable = 0;
 	mem_level->EnabledForThrottle = 1;
 	mem_level->EnabledForActivity = 0;
 	mem_level->UpHyst = 0;
@@ -1304,7 +1351,6 @@ static int polaris10_populate_single_memory_level(struct pp_hwmgr *hwmgr,
 	mem_level->VoltageDownHyst = 0;
 	mem_level->ActivityLevel = (uint16_t)data->mclk_activity_target;
 	mem_level->StutterEnable = false;
-
 	mem_level->DisplayWatermark = PPSMC_DISPLAY_WATERMARK_LOW;
 
 	data->display_timing.num_existing_displays = info.display_count;
@@ -1358,12 +1404,12 @@ static int polaris10_populate_all_memory_levels(struct pp_hwmgr *hwmgr)
 			return result;
 	}
 
-	/* in order to prevent MC activity from stutter mode to push DPM up.
+	/* In order to prevent MC activity from stutter mode to push DPM up,
 	 * the UVD change complements this by putting the MCLK in
-	 * a higher state by default such that we are not effected by
+	 * a higher state by default such that we are not affected by
 	 * up threshold or and MCLK DPM latency.
 	 */
-	levels[0].ActivityLevel = (uint16_t)data->mclk_dpm0_activity_target;
+	levels[0].ActivityLevel = 0x1f;
 	CONVERT_FROM_HOST_TO_SMC_US(levels[0].ActivityLevel);
 
 	data->smc_state_table.MemoryDpmLevelCount =
@@ -1761,12 +1807,9 @@ static int polaris10_populate_smc_initailial_state(struct pp_hwmgr *hwmgr)
 
 static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 {
-	uint32_t ro, efuse, efuse2, clock_freq, volt_without_cks,
-			volt_with_cks, value;
-	uint16_t clock_freq_u16;
+	uint32_t ro, efuse, volt_without_cks, volt_with_cks, value, max, min;
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-	uint8_t type, i, j, cks_setting, stretch_amount, stretch_amount2,
-			volt_offset = 0;
+	uint8_t i, stretch_amount, stretch_amount2, volt_offset = 0;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
@@ -1778,50 +1821,38 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 	 * if the part is SS or FF. if RO >= 1660MHz, part is FF.
 	 */
 	efuse = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (146 * 4));
-	efuse2 = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixSMU_EFUSE_0 + (148 * 4));
+			ixSMU_EFUSE_0 + (67 * 4));
 	efuse &= 0xFF000000;
 	efuse = efuse >> 24;
-	efuse2 &= 0xF;
-
-	if (efuse2 == 1)
-		ro = (2300 - 1350) * efuse / 255 + 1350;
-	else
-		ro = (2500 - 1000) * efuse / 255 + 1000;
 
-	if (ro >= 1660)
-		type = 0;
-	else
-		type = 1;
+	if (hwmgr->chip_id == CHIP_POLARIS10) {
+		min = 1000;
+		max = 2300;
+	} else {
+		min = 1100;
+		max = 2100;
+	}
 
-	/* Populate Stretch amount */
-	data->smc_state_table.ClockStretcherAmount = stretch_amount;
+	ro = efuse * (max -min)/255 + min;
 
 	/* Populate Sclk_CKS_masterEn0_7 and Sclk_voltageOffset */
 	for (i = 0; i < sclk_table->count; i++) {
 		data->smc_state_table.Sclk_CKS_masterEn0_7 |=
 				sclk_table->entries[i].cks_enable << i;
-		volt_without_cks = (uint32_t)((14041 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3571 + 75 - ro) * 1000 /
-			(4026 - (13924 * (sclk_table->entries[i].clk/100) / 10000)));
-		volt_with_cks = (uint32_t)((13946 *
-			(sclk_table->entries[i].clk/100) / 10000 + 3320 + 45 - ro) * 1000 /
-			(3664 - (11454 * (sclk_table->entries[i].clk/100) / 10000)));
+
+		volt_without_cks =  (uint32_t)(((ro - 40) * 1000 - 2753594 - sclk_table->entries[i].clk/100 * 136418 /1000) / \
+					(sclk_table->entries[i].clk/100 * 1132925 /10000 - 242418)/100);
+
+		volt_with_cks = (uint32_t)((ro * 1000 -2396351 - sclk_table->entries[i].clk/100 * 329021/1000) / \
+				(sclk_table->entries[i].clk/10000 * 649434 /1000  - 18005)/10);
+
 		if (volt_without_cks >= volt_with_cks)
 			volt_offset = (uint8_t)(((volt_without_cks - volt_with_cks +
 					sclk_table->entries[i].cks_voffset) * 100 / 625) + 1);
+
 		data->smc_state_table.Sclk_voltageOffset[i] = volt_offset;
 	}
 
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			STRETCH_ENABLE, 0x0);
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x1);
-	/* PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE, staticEnable, 0x1); */
-	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, PWR_CKS_ENABLE,
-			masterReset, 0x0);
-
 	/* Populate CKS Lookup Table */
 	if (stretch_amount == 1 || stretch_amount == 2 || stretch_amount == 5)
 		stretch_amount2 = 0;
@@ -1835,69 +1866,6 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 				return -EINVAL);
 	}
 
-	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL);
-	value &= 0xFFC2FF87;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][0];
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][1];
-	clock_freq_u16 = (uint16_t)(PP_SMC_TO_HOST_UL(data->smc_state_table.
-			GraphicsLevel[data->smc_state_table.GraphicsDpmLevelCount - 1].SclkSetting.SclkFrequency) / 100);
-	if (polaris10_clock_stretcher_lookup_table[stretch_amount2][0] < clock_freq_u16
-	&& polaris10_clock_stretcher_lookup_table[stretch_amount2][1] > clock_freq_u16) {
-		/* Program PWR_CKS_CNTL. CKS_USE_FOR_LOW_FREQ */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 16;
-		/* Program PWR_CKS_CNTL. CKS_LDO_REFSEL */
-		value |= (polaris10_clock_stretcher_lookup_table[stretch_amount2][2]) << 18;
-		/* Program PWR_CKS_CNTL. CKS_STRETCH_AMOUNT */
-		value |= (polaris10_clock_stretch_amount_conversion
-				[polaris10_clock_stretcher_lookup_table[stretch_amount2][3]]
-				 [stretch_amount]) << 3;
-	}
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].minFreq);
-	CONVERT_FROM_HOST_TO_SMC_US(data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].maxFreq);
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting =
-			polaris10_clock_stretcher_lookup_table[stretch_amount2][2] & 0x7F;
-	data->smc_state_table.CKS_LOOKUPTable.CKS_LOOKUPTableEntry[0].setting |=
-			(polaris10_clock_stretcher_lookup_table[stretch_amount2][3]) << 7;
-
-	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
-			ixPWR_CKS_CNTL, value);
-
-	/* Populate DDT Lookup Table */
-	for (i = 0; i < 4; i++) {
-		/* Assign the minimum and maximum VID stored
-		 * in the last row of Clock Stretcher Voltage Table.
-		 */
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].minVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][2];
-		data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].maxVID =
-				(uint8_t) polaris10_clock_stretcher_ddt_table[type][i][3];
-		/* Loop through each SCLK and check the frequency
-		 * to see if it lies within the frequency for clock stretcher.
-		 */
-		for (j = 0; j < data->smc_state_table.GraphicsDpmLevelCount; j++) {
-			cks_setting = 0;
-			clock_freq = PP_SMC_TO_HOST_UL(
-					data->smc_state_table.GraphicsLevel[j].SclkSetting.SclkFrequency);
-			/* Check the allowed frequency against the sclk level[j].
-			 *  Sclk's endianness has already been converted,
-			 *  and it's in 10Khz unit,
-			 *  as opposed to Data table, which is in Mhz unit.
-			 */
-			if (clock_freq >= (polaris10_clock_stretcher_ddt_table[type][i][0]) * 100) {
-				cks_setting |= 0x2;
-				if (clock_freq < (polaris10_clock_stretcher_ddt_table[type][i][1]) * 100)
-					cks_setting |= 0x1;
-			}
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting
-							|= cks_setting << (j * 2);
-		}
-		CONVERT_FROM_HOST_TO_SMC_US(
-			data->smc_state_table.ClockStretcherDataTable.ClockStretcherDataTableEntry[i].setting);
-	}
-
 	value = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL);
 	value &= 0xFFFFFFFE;
 	cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixPWR_CKS_CNTL, value);
@@ -1945,9 +1913,8 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	if (POLARIS10_VOLTAGE_CONTROL_BY_SVID2 == data->mvdd_control) {
 		config = VR_SVI2_PLANE_2;
 		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
-	} else if (POLARIS10_VOLTAGE_CONTROL_BY_GPIO == data->mvdd_control) {
-		config = VR_SMIO_PATTERN_2;
-		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, data->soft_regs_start +
+			offsetof(SMU74_SoftRegisters, AllowMvddSwitch), 0x1);
 	} else {
 		config = VR_STATIC_VOLTAGE;
 		table->VRConfig |= (config << VRCONF_MVDD_SHIFT);
@@ -1956,6 +1923,90 @@ static int polaris10_populate_vr_config(struct pp_hwmgr *hwmgr,
 	return 0;
 }
 
+
+int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	SMU74_Discrete_DpmTable  *table = &(data->smc_state_table);
+	int result = 0;
+	struct pp_atom_ctrl__avfs_parameters avfs_params = {0};
+	AVFS_meanNsigma_t AVFS_meanNsigma = { {0} };
+	AVFS_Sclk_Offset_t AVFS_SclkOffset = { {0} };
+	uint32_t tmp, i;
+	struct pp_smumgr *smumgr = hwmgr->smumgr;
+	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+
+	struct phm_ppt_v1_information *table_info =
+			(struct phm_ppt_v1_information *)hwmgr->pptable;
+	struct phm_ppt_v1_clock_voltage_dependency_table *sclk_table =
+			table_info->vdd_dep_on_sclk;
+
+
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
+		return result;
+
+	result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
+
+	if (0 == result) {
+		table->BTCGB_VDROOP_TABLE[0].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
+		table->BTCGB_VDROOP_TABLE[0].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
+		table->BTCGB_VDROOP_TABLE[0].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
+		table->BTCGB_VDROOP_TABLE[1].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0);
+		table->BTCGB_VDROOP_TABLE[1].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1);
+		table->BTCGB_VDROOP_TABLE[1].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2);
+		table->AVFSGB_VDROOP_TABLE[0].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_m1);
+		table->AVFSGB_VDROOP_TABLE[0].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSON_m2);
+		table->AVFSGB_VDROOP_TABLE[0].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSON_b);
+		table->AVFSGB_VDROOP_TABLE[0].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[0].m2_shift  = 12;
+		table->AVFSGB_VDROOP_TABLE[1].m1 = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1);
+		table->AVFSGB_VDROOP_TABLE[1].m2 = PP_HOST_TO_SMC_US(avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2);
+		table->AVFSGB_VDROOP_TABLE[1].b  = PP_HOST_TO_SMC_UL(avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b);
+		table->AVFSGB_VDROOP_TABLE[1].m1_shift = 24;
+		table->AVFSGB_VDROOP_TABLE[1].m2_shift  = 12;
+		table->MaxVoltage                = PP_HOST_TO_SMC_US(avfs_params.usMaxVoltage_0_25mv);
+		AVFS_meanNsigma.Aconstant[0]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant0);
+		AVFS_meanNsigma.Aconstant[1]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant1);
+		AVFS_meanNsigma.Aconstant[2]      = PP_HOST_TO_SMC_UL(avfs_params.ulAVFS_meanNsigma_Acontant2);
+		AVFS_meanNsigma.DC_tol_sigma      = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_DC_tol_sigma);
+		AVFS_meanNsigma.Platform_mean     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_mean);
+		AVFS_meanNsigma.PSM_Age_CompFactor = PP_HOST_TO_SMC_US(avfs_params.usPSM_Age_ComFactor);
+		AVFS_meanNsigma.Platform_sigma     = PP_HOST_TO_SMC_US(avfs_params.usAVFS_meanNsigma_Platform_sigma);
+
+		for (i = 0; i < NUM_VFT_COLUMNS; i++) {
+			AVFS_meanNsigma.Static_Voltage_Offset[i] = (uint8_t)(sclk_table->entries[i].cks_voffset * 100 / 625);
+			AVFS_SclkOffset.Sclk_Offset[i] = PP_HOST_TO_SMC_US((uint16_t)(sclk_table->entries[i].sclk_offset) / 100);
+		}
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsMeanNSigma),
+				&tmp, data->sram_end);
+
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_meanNsigma,
+					sizeof(AVFS_meanNsigma_t),
+					data->sram_end);
+
+		result = polaris10_read_smc_sram_dword(smumgr,
+				SMU7_FIRMWARE_HEADER_LOCATION + offsetof(SMU74_Firmware_Header, AvfsSclkOffsetTable),
+				&tmp, data->sram_end);
+		polaris10_copy_bytes_to_smc(smumgr,
+					tmp,
+					(uint8_t *)&AVFS_SclkOffset,
+					sizeof(AVFS_Sclk_Offset_t),
+					data->sram_end);
+
+		data->avfs_vdroop_override_setting = (avfs_params.ucEnableGB_VDROOP_TABLE_CKSON << BTCGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_VDROOP_TABLE_CKSOFF << BTCGB1_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSON << AVFSGB0_Vdroop_Enable_SHIFT) |
+						(avfs_params.ucEnableGB_FUSE_TABLE_CKSOFF << AVFSGB1_Vdroop_Enable_SHIFT);
+		data->apply_avfs_cks_off_voltage = (avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage == 1) ? true : false;
+	}
+	return result;
+}
+
+
 /**
 * Initializes the SMC table and uploads it
 *
@@ -2056,6 +2107,10 @@ static int polaris10_init_smc_table(struct pp_hwmgr *hwmgr)
 				"Failed to populate Clock Stretcher Data Table!",
 				return result);
 	}
+
+	result = polaris10_populate_avfs_parameters(hwmgr);
+	PP_ASSERT_WITH_CODE(0 == result, "Failed to populate AVFS Parameters!", return result;);
+
 	table->CurrSclkPllRange = 0xff;
 	table->GraphicsVoltageChangeEnable  = 1;
 	table->GraphicsThermThrottleEnable  = 1;
@@ -2229,6 +2284,17 @@ static int polaris10_enable_ulv(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_ulv(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_ulv_parm *ulv = &(data->ulv);
+
+	if (ulv->ulv_supported)
+		return smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_DisableULV);
+
+	return 0;
+}
+
 static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 {
 	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -2249,9 +2315,27 @@ static int polaris10_enable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_deep_sleep_master_switch(struct pp_hwmgr *hwmgr)
+{
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_SclkDeepSleep)) {
+		if (smum_send_msg_to_smc(hwmgr->smumgr,
+				PPSMC_MSG_MASTER_DeepSleep_OFF)) {
+			PP_ASSERT_WITH_CODE(false,
+					"Attempt to disable Master Deep Sleep switch failed!",
+					return -1);
+		}
+	}
+
+	return 0;
+}
+
 static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	uint32_t soft_register_value = 0;
+	uint32_t handshake_disables_offset = data->soft_regs_start
+				+ offsetof(SMU74_SoftRegisters, HandshakeDisables);
 
 	/* enable SCLK dpm */
 	if (!data->sclk_dpm_key_disabled)
@@ -2262,6 +2346,12 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 
 	/* enable MCLK dpm */
 	if (0 == data->mclk_dpm_key_disabled) {
+/* Disable UVD - SMU handshake for MCLK. */
+		soft_register_value = cgs_read_ind_register(hwmgr->device,
+					CGS_IND_REG__SMC, handshake_disables_offset);
+		soft_register_value |= SMU7_UVD_MCLK_HANDSHAKE_DISABLE;
+		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC,
+				handshake_disables_offset, soft_register_value);
 
 		PP_ASSERT_WITH_CODE(
 				(0 == smum_send_msg_to_smc(hwmgr->smumgr,
@@ -2269,7 +2359,6 @@ static int polaris10_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
 				"Failed to enable MCLK DPM during DPM Start Function!",
 				return -1);
 
-
 		PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
 
 		cgs_write_ind_register(hwmgr->device, CGS_IND_REG__SMC, ixLCAC_MC0_CNTL, 0x5);
@@ -2338,6 +2427,58 @@ static int polaris10_start_dpm(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int polaris10_disable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	/* disable SCLK dpm */
+	if (!data->sclk_dpm_key_disabled)
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_DPM_Disable) == 0),
+				"Failed to disable SCLK DPM!",
+				return -1);
+
+	/* disable MCLK dpm */
+	if (!data->mclk_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_MCLKDPM_Disable) == 0),
+				"Failed to disable MCLK DPM!",
+				return -1);
+	}
+
+	return 0;
+}
+
+static int polaris10_stop_dpm(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	/* disable general power management */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, GENERAL_PWRMGT,
+			GLOBAL_PWRMGT_EN, 0);
+	/* disable sclk deep sleep */
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC, SCLK_PWRMGT_CNTL,
+			DYNAMIC_PM_EN, 0);
+
+	/* disable PCIE dpm */
+	if (!data->pcie_dpm_key_disabled) {
+		PP_ASSERT_WITH_CODE(
+				(smum_send_msg_to_smc(hwmgr->smumgr,
+						PPSMC_MSG_PCIeDPM_Disable) == 0),
+				"Failed to disable pcie DPM during DPM Stop Function!",
+				return -1);
+	}
+
+	if (polaris10_disable_sclk_mclk_dpm(hwmgr)) {
+		printk(KERN_ERR "Failed to disable Sclk DPM and Mclk DPM!");
+		return -1;
+	}
+
+	return 0;
+}
+
 static void polaris10_set_dpm_event_sources(struct pp_hwmgr *hwmgr, uint32_t sources)
 {
 	bool protection;
@@ -2395,6 +2536,23 @@ static int polaris10_enable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
 	return polaris10_enable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
 }
 
+static int polaris10_disable_auto_throttle_source(struct pp_hwmgr *hwmgr,
+		PHM_AutoThrottleSource source)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+
+	if (data->active_auto_throttle_sources & (1 << source)) {
+		data->active_auto_throttle_sources &= ~(1 << source);
+		polaris10_set_dpm_event_sources(hwmgr, data->active_auto_throttle_sources);
+	}
+	return 0;
+}
+
+static int polaris10_disable_thermal_auto_throttle(struct pp_hwmgr *hwmgr)
+{
+	return polaris10_disable_auto_throttle_source(hwmgr, PHM_AutoThrottleSource_Thermal);
+}
+
 int polaris10_pcie_performance_request(struct pp_hwmgr *hwmgr)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -2516,8 +2674,60 @@ int polaris10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 
 int polaris10_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
 {
+	int tmp_result, result = 0;
 
-	return 0;
+	tmp_result = (polaris10_is_dpm_running(hwmgr)) ? 0 : -1;
+	PP_ASSERT_WITH_CODE(tmp_result == 0,
+			"DPM is not running right now, no need to disable DPM!",
+			return 0);
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_ThermalController))
+		PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+				GENERAL_PWRMGT, THERMAL_PROTECTION_DIS, 1);
+
+	tmp_result = polaris10_disable_power_containment(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable power containment!", result = tmp_result);
+
+	tmp_result = polaris10_disable_smc_cac(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable SMC CAC!", result = tmp_result);
+
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			CG_SPLL_SPREAD_SPECTRUM, SSEN, 0);
+	PHM_WRITE_INDIRECT_FIELD(hwmgr->device, CGS_IND_REG__SMC,
+			GENERAL_PWRMGT, DYN_SPREAD_SPECTRUM_EN, 0);
+
+	tmp_result = polaris10_disable_thermal_auto_throttle(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable thermal auto throttle!", result = tmp_result);
+
+	tmp_result = polaris10_stop_dpm(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to stop DPM!", result = tmp_result);
+
+	tmp_result = polaris10_disable_deep_sleep_master_switch(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable deep sleep master switch!", result = tmp_result);
+
+	tmp_result = polaris10_disable_ulv(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to disable ULV!", result = tmp_result);
+
+	tmp_result = polaris10_clear_voting_clients(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to clear voting clients!", result = tmp_result);
+
+	tmp_result = polaris10_reset_to_default(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to reset to default!", result = tmp_result);
+
+	tmp_result = polaris10_force_switch_to_arbf0(hwmgr);
+	PP_ASSERT_WITH_CODE((tmp_result == 0),
+			"Failed to force to switch arbf0!", result = tmp_result);
+
+	return result;
 }
 
 int polaris10_reset_asic_tasks(struct pp_hwmgr *hwmgr)
@@ -2528,13 +2738,6 @@ int polaris10_reset_asic_tasks(struct pp_hwmgr *hwmgr)
 
 int polaris10_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
@@ -2590,8 +2793,13 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 	phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_TCPRamping);
 
-	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
-					PHM_PlatformCaps_PowerContainment);
+	if (hwmgr->powercontainment_enabled)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+			    PHM_PlatformCaps_PowerContainment);
+	else
+		phm_cap_unset(hwmgr->platform_descriptor.platformCaps,
+			    PHM_PlatformCaps_PowerContainment);
+
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 							PHM_PlatformCaps_CAC);
 
@@ -2606,6 +2814,7 @@ int polaris10_set_features_platform_caps(struct pp_hwmgr *hwmgr)
 
 	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 						PHM_PlatformCaps_FanSpeedInTableIsRPM);
+
 	if (hwmgr->chip_id == CHIP_POLARIS11)
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 					PHM_PlatformCaps_SPLLShutdownSupport);
@@ -2662,12 +2871,12 @@ static int polaris10_get_evv_voltages(struct pp_hwmgr *hwmgr)
 				}
 			}
 
-
-			PP_ASSERT_WITH_CODE(0 == atomctrl_get_voltage_evv_on_sclk_ai(hwmgr,
-							VOLTAGE_TYPE_VDDC, sclk, vv_id, &vddc),
-						"Error retrieving EVV voltage value!",
-						continue);
-
+			if (atomctrl_get_voltage_evv_on_sclk_ai(hwmgr,
+						VOLTAGE_TYPE_VDDC,
+						sclk, vv_id, &vddc) != 0) {
+				printk(KERN_WARNING "failed to retrieving EVV voltage!\n");
+				continue;
+			}
 
 			/* need to make sure vddc is less than 2v or else, it could burn the ASIC. */
 			PP_ASSERT_WITH_CODE((vddc < 2000 && vddc != 0),
@@ -2898,13 +3107,19 @@ static int polaris10_set_private_data_based_on_pptable(struct pp_hwmgr *hwmgr)
 
 int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_hwmgr *data;
 	struct pp_atomctrl_gpio_pin_assignment gpio_pin_assignment;
 	uint32_t temp_reg;
 	int result;
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
+	data = kzalloc(sizeof(struct polaris10_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
 	data->dll_default_on = false;
 	data->sram_end = SMC_RAM_END;
 	data->mclk_dpm0_activity_target = 0xa;
@@ -2938,6 +3153,11 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vddci_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = POLARIS10_VOLTAGE_CONTROL_NONE;
 
+	data->enable_tdc_limit_feature = true;
+	data->enable_pkg_pwr_tracking_feature = true;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
+	data->mclk_stutter_mode_threshold = 40000;
+
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
 		data->voltage_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
@@ -2962,6 +3182,10 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 			data->vddci_control = POLARIS10_VOLTAGE_CONTROL_BY_SVID2;
 	}
 
+	if (table_info->cac_dtp_table->usClockStretchAmount != 0)
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+					PHM_PlatformCaps_ClockStretcher);
+
 	polaris10_set_features_platform_caps(hwmgr);
 
 	polaris10_init_dpm_defaults(hwmgr);
@@ -3068,7 +3292,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -3077,7 +3301,7 @@ int polaris10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 
@@ -3520,10 +3744,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	ATOM_Tonga_State *state_entry = (ATOM_Tonga_State *)state;
 	ATOM_Tonga_POWERPLAYTABLE *powerplay_table =
 			(ATOM_Tonga_POWERPLAYTABLE *)pp_table;
-	ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table =
-			(ATOM_Tonga_SCLK_Dependency_Table *)
+	PPTable_Generic_SubTable_Header *sclk_dep_table =
+			(PPTable_Generic_SubTable_Header *)
 			(((unsigned long)powerplay_table) +
 				le16_to_cpu(powerplay_table->usSclkDependencyTableOffset));
+
 	ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table =
 			(ATOM_Tonga_MCLK_Dependency_Table *)
 			(((unsigned long)powerplay_table) +
@@ -3575,7 +3800,11 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 	/* Performance levels are arranged from low to high. */
 	performance_level->memory_clock = mclk_dep_table->entries
 			[state_entry->ucMemoryClockIndexLow].ulMclk;
-	performance_level->engine_clock = sclk_dep_table->entries
+	if (sclk_dep_table->ucRevId == 0)
+		performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries
+			[state_entry->ucEngineClockIndexLow].ulSclk;
+	else if (sclk_dep_table->ucRevId == 1)
+		performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries
 			[state_entry->ucEngineClockIndexLow].ulSclk;
 	performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap,
 			state_entry->ucPCIEGenLow);
@@ -3586,8 +3815,14 @@ static int polaris10_get_pp_table_entry_callback_func(struct pp_hwmgr *hwmgr,
 			[polaris10_power_state->performance_level_count++]);
 	performance_level->memory_clock = mclk_dep_table->entries
 			[state_entry->ucMemoryClockIndexHigh].ulMclk;
-	performance_level->engine_clock = sclk_dep_table->entries
+
+	if (sclk_dep_table->ucRevId == 0)
+		performance_level->engine_clock = ((ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table)->entries
+			[state_entry->ucEngineClockIndexHigh].ulSclk;
+	else if (sclk_dep_table->ucRevId == 1)
+		performance_level->engine_clock = ((ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table)->entries
 			[state_entry->ucEngineClockIndexHigh].ulSclk;
+
 	performance_level->pcie_gen = get_pcie_gen_support(data->pcie_gen_cap,
 			state_entry->ucPCIEGenHigh);
 	performance_level->pcie_lane = get_pcie_lane_support(data->pcie_lane_cap,
@@ -3645,7 +3880,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
 		switch (state->classification.ui_label) {
 		case PP_StateUILabel_Performance:
 			data->use_pcie_performance_levels = true;
-
 			for (i = 0; i < ps->performance_level_count; i++) {
 				if (data->pcie_gen_performance.max <
 						ps->performance_levels[i].pcie_gen)
@@ -3661,7 +3895,6 @@ static int polaris10_get_pp_table_entry(struct pp_hwmgr *hwmgr,
 						ps->performance_levels[i].pcie_lane)
 					data->pcie_lane_performance.max =
 							ps->performance_levels[i].pcie_lane;
-
 				if (data->pcie_lane_performance.min >
 						ps->performance_levels[i].pcie_lane)
 					data->pcie_lane_performance.min =
@@ -4187,12 +4420,9 @@ int polaris10_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 	uint32_t mm_boot_level_offset, mm_boot_level_value;
-	struct phm_ppt_v1_information *table_info =
-			(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
 	if (!bgate) {
-		data->smc_state_table.SamuBootLevel =
-				(uint8_t) (table_info->mm_dep_table->count - 1);
+		data->smc_state_table.SamuBootLevel = 0;
 		mm_boot_level_offset = data->dpm_table_start +
 				offsetof(SMU74_Discrete_DpmTable, SamuBootLevel);
 		mm_boot_level_offset /= 4;
@@ -4721,42 +4951,6 @@ int polaris10_setup_asic_task(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
-static int polaris10_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int polaris10_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int polaris10_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -4899,6 +5093,89 @@ static int polaris10_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
+static int polaris10_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct polaris10_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int polaris10_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct polaris10_power_state  *polaris10_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	polaris10_ps = cast_phw_polaris10_power_state(&ps->hardware);
+
+	polaris10_ps->performance_levels[polaris10_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int polaris10_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct polaris10_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int polaris10_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	struct polaris10_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct polaris10_power_state  *polaris10_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	polaris10_ps = cast_phw_polaris10_power_state(&ps->hardware);
+
+	polaris10_ps->performance_levels[polaris10_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
 static const struct pp_hwmgr_func polaris10_hwmgr_funcs = {
 	.backend_init = &polaris10_hwmgr_backend_init,
 	.backend_fini = &polaris10_hwmgr_backend_fini,
@@ -4937,22 +5214,17 @@ static const struct pp_hwmgr_func polaris10_hwmgr_funcs = {
 	.check_states_equal = polaris10_check_states_equal,
 	.set_fan_control_mode = polaris10_set_fan_control_mode,
 	.get_fan_control_mode = polaris10_get_fan_control_mode,
-	.get_pp_table = polaris10_get_pp_table,
-	.set_pp_table = polaris10_set_pp_table,
 	.force_clock_level = polaris10_force_clock_level,
 	.print_clock_levels = polaris10_print_clock_levels,
 	.enable_per_cu_power_gating = polaris10_phm_enable_per_cu_power_gating,
+	.get_sclk_od = polaris10_get_sclk_od,
+	.set_sclk_od = polaris10_set_sclk_od,
+	.get_mclk_od = polaris10_get_mclk_od,
+	.set_mclk_od = polaris10_set_mclk_od,
 };
 
 int polaris10_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	struct polaris10_hwmgr  *data;
-
-	data = kzalloc (sizeof(struct polaris10_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &polaris10_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_polaris10_thermal_initialize(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
index beedf35cbfa6..fd38b0d7a3c2 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_hwmgr.h
@@ -309,9 +309,8 @@ struct polaris10_hwmgr {
 	uint32_t                           up_hyst;
 	uint32_t disable_dpm_mask;
 	bool apply_optimized_settings;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
+	uint32_t                              avfs_vdroop_override_setting;
+	bool                                  apply_avfs_cks_off_voltage;
 };
 
 /* To convert to Q8.8 format for firmware */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
index 0b99ab3ba0c5..5620e268b553 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.c
@@ -286,7 +286,7 @@ int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr)
 
 		if (polaris10_copy_bytes_to_smc(hwmgr->smumgr, pm_fuse_table_offset,
 				(uint8_t *)&data->power_tune_table,
-				sizeof(struct SMU74_Discrete_PmFuses), data->sram_end))
+				(sizeof(struct SMU74_Discrete_PmFuses) - 92), data->sram_end))
 			PP_ASSERT_WITH_CODE(false,
 					"Attempt to download PmFuseTable Failed!",
 					return -EINVAL);
@@ -312,6 +312,23 @@ int polaris10_enable_smc_cac(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int polaris10_disable_smc_cac(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_CAC) && data->cac_enabled) {
+		int smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+				(uint16_t)(PPSMC_MSG_DisableCac));
+		PP_ASSERT_WITH_CODE((smc_result == 0),
+				"Failed to disable CAC in SMC.", result = -1);
+
+		data->cac_enabled = false;
+	}
+	return result;
+}
+
 int polaris10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n)
 {
 	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
@@ -373,6 +390,48 @@ int polaris10_enable_power_containment(struct pp_hwmgr *hwmgr)
 	return result;
 }
 
+int polaris10_disable_power_containment(struct pp_hwmgr *hwmgr)
+{
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
+	int result = 0;
+
+	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
+			PHM_PlatformCaps_PowerContainment) &&
+			data->power_containment_features) {
+		int smc_result;
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_TDCLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_TDCLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable TDCLimit in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_DTE) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_DisableDTE));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable DTE in SMC.",
+					result = smc_result);
+		}
+
+		if (data->power_containment_features &
+				POWERCONTAINMENT_FEATURE_PkgPwrLimit) {
+			smc_result = smum_send_msg_to_smc(hwmgr->smumgr,
+					(uint16_t)(PPSMC_MSG_PkgPwrLimitDisable));
+			PP_ASSERT_WITH_CODE((smc_result == 0),
+					"Failed to disable PkgPwrTracking in SMC.",
+					result = smc_result);
+		}
+		data->power_containment_features = 0;
+	}
+
+	return result;
+}
+
 int polaris10_power_control_set_level(struct pp_hwmgr *hwmgr)
 {
 	struct phm_ppt_v1_information *table_info =
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
index 68bc1cb6d40c..d492d6d28867 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_powertune.h
@@ -31,6 +31,19 @@ enum polaris10_pt_config_reg_type {
 	POLARIS10_CONFIGREG_MAX
 };
 
+#define DIDT_SQ_CTRL0__UNUSED_0_MASK    0xfffc0000
+#define DIDT_SQ_CTRL0__UNUSED_0__SHIFT  0x12
+#define DIDT_TD_CTRL0__UNUSED_0_MASK    0xfffc0000
+#define DIDT_TD_CTRL0__UNUSED_0__SHIFT  0x12
+#define DIDT_TCP_CTRL0__UNUSED_0_MASK   0xfffc0000
+#define DIDT_TCP_CTRL0__UNUSED_0__SHIFT 0x12
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK                 0xc0000000
+#define DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001e
+#define DIDT_TD_TUNING_CTRL__UNUSED_0_MASK                 0xc0000000
+#define DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT               0x0000001e
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK                0xc0000000
+#define DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT              0x0000001e
+
 /* PowerContainment Features */
 #define POWERCONTAINMENT_FEATURE_DTE             0x00000001
 #define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
@@ -62,7 +75,9 @@ void polaris10_initialize_power_tune_defaults(struct pp_hwmgr *hwmgr);
 int polaris10_populate_bapm_parameters_in_dpm_table(struct pp_hwmgr *hwmgr);
 int polaris10_populate_pm_fuses(struct pp_hwmgr *hwmgr);
 int polaris10_enable_smc_cac(struct pp_hwmgr *hwmgr);
+int polaris10_disable_smc_cac(struct pp_hwmgr *hwmgr);
 int polaris10_enable_power_containment(struct pp_hwmgr *hwmgr);
+int polaris10_disable_power_containment(struct pp_hwmgr *hwmgr);
 int polaris10_set_power_limit(struct pp_hwmgr *hwmgr, uint32_t n);
 int polaris10_power_control_set_level(struct pp_hwmgr *hwmgr);
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
index aba167f7d167..b206632d4650 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/polaris10_thermal.c
@@ -625,10 +625,14 @@ static int tf_polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr,
 	int ret;
 	struct pp_smumgr *smumgr = (struct pp_smumgr *)(hwmgr->smumgr);
 	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(smumgr->backend);
+	struct polaris10_hwmgr *data = (struct polaris10_hwmgr *)(hwmgr->backend);
 
-	if (smu_data->avfs.avfs_btc_status != AVFS_BTC_ENABLEAVFS)
+	if (smu_data->avfs.avfs_btc_status == AVFS_BTC_NOTSUPPORTED)
 		return 0;
 
+	ret = smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+			PPSMC_MSG_SetGBDroopSettings, data->avfs_vdroop_override_setting);
+
 	ret = (smum_send_msg_to_smc(smumgr, PPSMC_MSG_EnableAvfs) == 0) ?
 			0 : -1;
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
index 58742e0d1492..a3c38bbd1e94 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_acpi.c
@@ -44,6 +44,20 @@ bool acpi_atcs_functions_supported(void *device, uint32_t index)
 	return result == 0 ? (output_buf.function_bits & (1 << (index - 1))) != 0 : false;
 }
 
+bool acpi_atcs_notify_pcie_device_ready(void *device)
+{
+	int32_t temp_buffer = 1;
+
+	return cgs_call_acpi_method(device, CGS_ACPI_METHOD_ATCS,
+				ATCS_FUNCTION_PCIE_DEVICE_READY_NOTIFICATION,
+						&temp_buffer,
+						NULL,
+						0,
+						sizeof(temp_buffer),
+						0);
+}
+
+
 int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 {
 	struct atcs_pref_req_input atcs_input;
@@ -52,7 +66,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 	int result;
 	struct cgs_system_info info = {0};
 
-	if (!acpi_atcs_functions_supported(device, ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST))
+	if( 0 != acpi_atcs_notify_pcie_device_ready(device))
 		return -EINVAL;
 
 	info.size = sizeof(struct cgs_system_info);
@@ -77,7 +91,7 @@ int acpi_pcie_perf_request(void *device, uint8_t perf_req, bool advertise)
 						ATCS_FUNCTION_PCIE_PERFORMANCE_REQUEST,
 						&atcs_input,
 						&atcs_output,
-						0,
+						1,
 						sizeof(atcs_input),
 						sizeof(atcs_output));
 		if (result != 0)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index da9f5f1b6dc2..5d70e2c47faf 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -552,13 +552,13 @@ static bool atomctrl_lookup_gpio_pin(
 				pin_assignment->ucGpioPinBitShift;
 			gpio_pin_assignment->us_gpio_pin_aindex =
 				le16_to_cpu(pin_assignment->usGpioPin_AIndex);
-			return false;
+			return true;
 		}
 
 		offset += offsetof(ATOM_GPIO_PIN_ASSIGNMENT, ucGPIO_ID) + 1;
 	}
 
-	return true;
+	return false;
 }
 
 /**
@@ -1302,3 +1302,46 @@ int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctr
 
 	return 0;
 }
+
+int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param)
+{
+	ATOM_ASIC_PROFILING_INFO_V3_6 *profile = NULL;
+
+	if (param == NULL)
+		return -EINVAL;
+
+	profile = (ATOM_ASIC_PROFILING_INFO_V3_6 *)
+			cgs_atom_get_data_table(hwmgr->device,
+					GetIndexIntoMasterTable(DATA, ASIC_ProfilingInfo),
+					NULL, NULL, NULL);
+	if (!profile)
+		return -1;
+
+	param->ulAVFS_meanNsigma_Acontant0 = profile->ulAVFS_meanNsigma_Acontant0;
+	param->ulAVFS_meanNsigma_Acontant1 = profile->ulAVFS_meanNsigma_Acontant1;
+	param->ulAVFS_meanNsigma_Acontant2 = profile->ulAVFS_meanNsigma_Acontant2;
+	param->usAVFS_meanNsigma_DC_tol_sigma = profile->usAVFS_meanNsigma_DC_tol_sigma;
+	param->usAVFS_meanNsigma_Platform_mean = profile->usAVFS_meanNsigma_Platform_mean;
+	param->usAVFS_meanNsigma_Platform_sigma = profile->usAVFS_meanNsigma_Platform_sigma;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a0 = profile->ulGB_VDROOP_TABLE_CKSOFF_a0;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a1 = profile->ulGB_VDROOP_TABLE_CKSOFF_a1;
+	param->ulGB_VDROOP_TABLE_CKSOFF_a2 = profile->ulGB_VDROOP_TABLE_CKSOFF_a2;
+	param->ulGB_VDROOP_TABLE_CKSON_a0 = profile->ulGB_VDROOP_TABLE_CKSON_a0;
+	param->ulGB_VDROOP_TABLE_CKSON_a1 = profile->ulGB_VDROOP_TABLE_CKSON_a1;
+	param->ulGB_VDROOP_TABLE_CKSON_a2 = profile->ulGB_VDROOP_TABLE_CKSON_a2;
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	param->usAVFSGB_FUSE_TABLE_CKSOFF_m2 = profile->usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	param->ulAVFSGB_FUSE_TABLE_CKSOFF_b = profile->ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	param->ulAVFSGB_FUSE_TABLE_CKSON_m1 = profile->ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	param->usAVFSGB_FUSE_TABLE_CKSON_m2 = profile->usAVFSGB_FUSE_TABLE_CKSON_m2;
+	param->ulAVFSGB_FUSE_TABLE_CKSON_b = profile->ulAVFSGB_FUSE_TABLE_CKSON_b;
+	param->usMaxVoltage_0_25mv = profile->usMaxVoltage_0_25mv;
+	param->ucEnableGB_VDROOP_TABLE_CKSOFF = profile->ucEnableGB_VDROOP_TABLE_CKSOFF;
+	param->ucEnableGB_VDROOP_TABLE_CKSON = profile->ucEnableGB_VDROOP_TABLE_CKSON;
+	param->ucEnableGB_FUSE_TABLE_CKSOFF = profile->ucEnableGB_FUSE_TABLE_CKSOFF;
+	param->ucEnableGB_FUSE_TABLE_CKSON = profile->ucEnableGB_FUSE_TABLE_CKSON;
+	param->usPSM_Age_ComFactor = profile->usPSM_Age_ComFactor;
+	param->ucEnableApplyAVFS_CKS_OFF_Voltage = profile->ucEnableApplyAVFS_CKS_OFF_Voltage;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index d24ebb566905..248c5db5f380 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -250,6 +250,35 @@ struct pp_atomctrl_gpio_pin_assignment {
 };
 typedef struct pp_atomctrl_gpio_pin_assignment pp_atomctrl_gpio_pin_assignment;
 
+struct pp_atom_ctrl__avfs_parameters {
+	uint32_t  ulAVFS_meanNsigma_Acontant0;
+	uint32_t  ulAVFS_meanNsigma_Acontant1;
+	uint32_t  ulAVFS_meanNsigma_Acontant2;
+	uint16_t usAVFS_meanNsigma_DC_tol_sigma;
+	uint16_t usAVFS_meanNsigma_Platform_mean;
+	uint16_t usAVFS_meanNsigma_Platform_sigma;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSOFF_a2;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a0;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a1;
+	uint32_t  ulGB_VDROOP_TABLE_CKSON_a2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSOFF_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSOFF_b;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_m1;
+	uint16_t  usAVFSGB_FUSE_TABLE_CKSON_m2;
+	uint32_t  ulAVFSGB_FUSE_TABLE_CKSON_b;
+	uint16_t  usMaxVoltage_0_25mv;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_VDROOP_TABLE_CKSON;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSOFF;
+	uint8_t  ucEnableGB_FUSE_TABLE_CKSON;
+	uint16_t usPSM_Age_ComFactor;
+	uint8_t  ucEnableApplyAVFS_CKS_OFF_Voltage;
+	uint8_t  ucReserved;
+};
+
 extern bool atomctrl_get_pp_assign_pin(struct pp_hwmgr *hwmgr, const uint32_t pinId, pp_atomctrl_gpio_pin_assignment *gpio_pin_assignment);
 extern int atomctrl_get_voltage_evv_on_sclk(struct pp_hwmgr *hwmgr, uint8_t voltage_type, uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
 extern uint32_t atomctrl_get_mpll_reference_clock(struct pp_hwmgr *hwmgr);
@@ -278,5 +307,8 @@ extern int atomctrl_set_ac_timing_ai(struct pp_hwmgr *hwmgr, uint32_t memory_clo
 extern int atomctrl_get_voltage_evv_on_sclk_ai(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
 				uint32_t sclk, uint16_t virtual_voltage_Id, uint16_t *voltage);
 extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl_sclk_range_table *table);
+
+extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param);
+
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
index 2f1a14fe05b1..35bc8a29b773 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/processpptables.c
@@ -794,15 +794,18 @@ static const ATOM_PPLIB_STATE_V2 *get_state_entry_v2(
 static const ATOM_PPLIB_POWERPLAYTABLE *get_powerplay_table(
 				     struct pp_hwmgr *hwmgr)
 {
-	const void *table_addr = NULL;
+	const void *table_addr = hwmgr->soft_pp_table;
 	uint8_t frev, crev;
 	uint16_t size;
 
-	table_addr = cgs_atom_get_data_table(hwmgr->device,
-			GetIndexIntoMasterTable(DATA, PowerPlayInfo),
-			&size, &frev, &crev);
+	if (!table_addr) {
+		table_addr = cgs_atom_get_data_table(hwmgr->device,
+				GetIndexIntoMasterTable(DATA, PowerPlayInfo),
+				&size, &frev, &crev);
 
-	hwmgr->soft_pp_table = table_addr;
+		hwmgr->soft_pp_table = table_addr;
+		hwmgr->soft_pp_table_size = size;
+	}
 
 	return (const ATOM_PPLIB_POWERPLAYTABLE *)table_addr;
 }
@@ -1499,7 +1502,7 @@ int get_number_of_vce_state_table_entries(
 	const ATOM_PPLIB_VCE_State_Table *vce_table =
 				    get_vce_state_table(hwmgr, table);
 
-	if (vce_table > 0)
+	if (vce_table)
 		return vce_table->numEntries;
 
 	return 0;
@@ -1589,11 +1592,6 @@ static int pp_tables_initialize(struct pp_hwmgr *hwmgr)
 
 static int pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
 {
-	if (NULL != hwmgr->soft_pp_table) {
-		kfree(hwmgr->soft_pp_table);
-		hwmgr->soft_pp_table = NULL;
-	}
-
 	if (NULL != hwmgr->dyn_state.vddc_dependency_on_sclk) {
 		kfree(hwmgr->dyn_state.vddc_dependency_on_sclk);
 		hwmgr->dyn_state.vddc_dependency_on_sclk = NULL;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index 16fed487973b..6c4553cf8023 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -2847,27 +2847,6 @@ static int tonga_setup_default_dpm_tables(struct pp_hwmgr *hwmgr)
 		}
 	}
 
-	/* Initialize Vddc DPM table based on allow Vddc values.  And populate corresponding std values. */
-	for (i = 0; i < allowed_vdd_sclk_table->count; i++) {
-		data->dpm_table.vddc_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddc;
-		/* tonga_hwmgr->dpm_table.VddcTable.dpm_levels[i].param1 = stdVoltageTable->entries[i].Leakage; */
-		/* param1 is for corresponding std voltage */
-		data->dpm_table.vddc_table.dpm_levels[i].enabled = 1;
-	}
-	data->dpm_table.vddc_table.count = allowed_vdd_sclk_table->count;
-
-	if (NULL != allowed_vdd_mclk_table) {
-		/* Initialize Vddci DPM table based on allow Mclk values */
-		for (i = 0; i < allowed_vdd_mclk_table->count; i++) {
-			data->dpm_table.vdd_ci_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].vddci;
-			data->dpm_table.vdd_ci_table.dpm_levels[i].enabled = 1;
-			data->dpm_table.mvdd_table.dpm_levels[i].value = allowed_vdd_mclk_table->entries[i].mvdd;
-			data->dpm_table.mvdd_table.dpm_levels[i].enabled = 1;
-		}
-		data->dpm_table.vdd_ci_table.count = allowed_vdd_mclk_table->count;
-		data->dpm_table.mvdd_table.count = allowed_vdd_mclk_table->count;
-	}
-
 	/* setup PCIE gen speed levels*/
 	tonga_setup_default_pcie_tables(hwmgr);
 
@@ -3047,8 +3026,8 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 
 	reg_value = 0;
 	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
-			VDDC_VRHOT_GPIO_PINID, &gpio_pin_assignment))) {
+		(atomctrl_get_pp_assign_pin(hwmgr, VDDC_VRHOT_GPIO_PINID,
+						&gpio_pin_assignment))) {
 		table->VRHotGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift;
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_RegulatorHot);
@@ -3061,8 +3040,8 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 	/* ACDC Switch GPIO */
 	reg_value = 0;
 	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
-			PP_AC_DC_SWITCH_GPIO_PINID, &gpio_pin_assignment))) {
+		(atomctrl_get_pp_assign_pin(hwmgr, PP_AC_DC_SWITCH_GPIO_PINID,
+						&gpio_pin_assignment))) {
 		table->AcDcGpio = gpio_pin_assignment.uc_gpio_pin_bit_shift;
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_AutomaticDCTransition);
@@ -3084,8 +3063,7 @@ int tonga_init_smc_table(struct pp_hwmgr *hwmgr)
 	}
 
 	reg_value = 0;
-	if ((0 == reg_value) &&
-		(0 == atomctrl_get_pp_assign_pin(hwmgr,
+	if ((0 == reg_value) && (atomctrl_get_pp_assign_pin(hwmgr,
 			THERMAL_INT_OUTPUT_GPIO_PINID, &gpio_pin_assignment))) {
 		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
 			PHM_PlatformCaps_ThermalOutGPIO);
@@ -4443,13 +4421,6 @@ int tonga_reset_asic_tasks(struct pp_hwmgr *hwmgr)
 
 int tonga_hwmgr_backend_fini(struct pp_hwmgr *hwmgr)
 {
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (data->soft_pp_table) {
-		kfree(data->soft_pp_table);
-		data->soft_pp_table = NULL;
-	}
-
 	return phm_hwmgr_backend_fini(hwmgr);
 }
 
@@ -4463,7 +4434,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 {
 	int result = 0;
 	SMU72_Discrete_DpmTable  *table = NULL;
-	tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	tonga_hwmgr *data;
 	pp_atomctrl_gpio_pin_assignment gpio_pin_assignment;
 	struct phm_ppt_v1_information *pptable_info = (struct phm_ppt_v1_information *)(hwmgr->pptable);
 	phw_tonga_ulv_parm *ulv;
@@ -4472,6 +4443,12 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	PP_ASSERT_WITH_CODE((NULL != hwmgr),
 		"Invalid Parameter!", return -1;);
 
+	data = kzalloc(sizeof(struct tonga_hwmgr), GFP_KERNEL);
+	if (data == NULL)
+		return -ENOMEM;
+
+	hwmgr->backend = data;
+
 	data->dll_defaule_on = 0;
 	data->sram_end = SMC_RAM_END;
 
@@ -4510,6 +4487,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	data->vdd_ci_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->vdd_gfx_control = TONGA_VOLTAGE_CONTROL_NONE;
 	data->mvdd_control = TONGA_VOLTAGE_CONTROL_NONE;
+	data->force_pcie_gen = PP_PCIEGenInvalid;
 
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 				VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2)) {
@@ -4591,7 +4569,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 	* if ucGPIO_ID=VDDC_PCC_GPIO_PINID in GPIO_LUTable,
 	* Peak Current Control feature is enabled and we should program PCC HW register
 	*/
-	if (0 == atomctrl_get_pp_assign_pin(hwmgr, VDDC_PCC_GPIO_PINID, &gpio_pin_assignment)) {
+	if (atomctrl_get_pp_assign_pin(hwmgr, VDDC_PCC_GPIO_PINID, &gpio_pin_assignment)) {
 		uint32_t temp_reg = cgs_read_ind_register(hwmgr->device,
 										CGS_IND_REG__SMC, ixCNB_PWRMGT_CNTL);
 
@@ -4659,7 +4637,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_GEN_INFO;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_gen_cap = 0x30007;
+			data->pcie_gen_cap = AMDGPU_DEFAULT_PCIE_GEN_MASK;
 		else
 			data->pcie_gen_cap = (uint32_t)sys_info.value;
 		if (data->pcie_gen_cap & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
@@ -4668,7 +4646,7 @@ int tonga_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 		sys_info.info_id = CGS_SYSTEM_INFO_PCIE_MLW;
 		result = cgs_query_system_info(hwmgr->device, &sys_info);
 		if (result)
-			data->pcie_lane_cap = 0x2f0000;
+			data->pcie_lane_cap = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		else
 			data->pcie_lane_cap = (uint32_t)sys_info.value;
 	} else {
@@ -6051,42 +6029,6 @@ static int tonga_get_fan_control_mode(struct pp_hwmgr *hwmgr)
 				CG_FDO_CTRL2, FDO_PWM_MODE);
 }
 
-static int tonga_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
-{
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kmemdup(hwmgr->soft_pp_table,
-					      hwmgr->soft_pp_table_size,
-					      GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	*table = (char *)&data->soft_pp_table;
-
-	return hwmgr->soft_pp_table_size;
-}
-
-static int tonga_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
-{
-	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
-
-	if (!data->soft_pp_table) {
-		data->soft_pp_table = kzalloc(hwmgr->soft_pp_table_size, GFP_KERNEL);
-		if (!data->soft_pp_table)
-			return -ENOMEM;
-	}
-
-	memcpy(data->soft_pp_table, buf, size);
-
-	hwmgr->soft_pp_table = data->soft_pp_table;
-
-	/* TODO: re-init powerplay to implement modified pptable */
-
-	return 0;
-}
-
 static int tonga_force_clock_level(struct pp_hwmgr *hwmgr,
 		enum pp_clock_type type, uint32_t mask)
 {
@@ -6194,11 +6136,96 @@ static int tonga_print_clock_levels(struct pp_hwmgr *hwmgr,
 	return size;
 }
 
+static int tonga_get_sclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+	struct tonga_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int tonga_set_sclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *golden_sclk_table =
+			&(data->golden_dpm_table.sclk_table);
+	struct pp_power_state  *ps;
+	struct tonga_power_state  *tonga_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	tonga_ps = cast_phw_tonga_power_state(&ps->hardware);
+
+	tonga_ps->performance_levels[tonga_ps->performance_level_count - 1].engine_clock =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int tonga_get_mclk_od(struct pp_hwmgr *hwmgr)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+	struct tonga_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int tonga_set_mclk_od(struct pp_hwmgr *hwmgr, uint32_t value)
+{
+	struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+	struct tonga_single_dpm_table *golden_mclk_table =
+			&(data->golden_dpm_table.mclk_table);
+	struct pp_power_state  *ps;
+	struct tonga_power_state  *tonga_ps;
+
+	if (value > 20)
+		value = 20;
+
+	ps = hwmgr->request_ps;
+
+	if (ps == NULL)
+		return -EINVAL;
+
+	tonga_ps = cast_phw_tonga_power_state(&ps->hardware);
+
+	tonga_ps->performance_levels[tonga_ps->performance_level_count - 1].memory_clock =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
+
 static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
 	.backend_init = &tonga_hwmgr_backend_init,
 	.backend_fini = &tonga_hwmgr_backend_fini,
 	.asic_setup = &tonga_setup_asic_task,
 	.dynamic_state_management_enable = &tonga_enable_dpm_tasks,
+	.dynamic_state_management_disable = &tonga_disable_dpm_tasks,
 	.apply_state_adjust_rules = tonga_apply_state_adjust_rules,
 	.force_dpm_level = &tonga_force_dpm_level,
 	.power_state_set = tonga_set_power_state_tasks,
@@ -6232,22 +6259,16 @@ static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
 	.check_states_equal = tonga_check_states_equal,
 	.set_fan_control_mode = tonga_set_fan_control_mode,
 	.get_fan_control_mode = tonga_get_fan_control_mode,
-	.get_pp_table = tonga_get_pp_table,
-	.set_pp_table = tonga_set_pp_table,
 	.force_clock_level = tonga_force_clock_level,
 	.print_clock_levels = tonga_print_clock_levels,
+	.get_sclk_od = tonga_get_sclk_od,
+	.set_sclk_od = tonga_set_sclk_od,
+	.get_mclk_od = tonga_get_mclk_od,
+	.set_mclk_od = tonga_set_mclk_od,
 };
 
 int tonga_hwmgr_init(struct pp_hwmgr *hwmgr)
 {
-	tonga_hwmgr  *data;
-
-	data = kzalloc (sizeof(tonga_hwmgr), GFP_KERNEL);
-	if (data == NULL)
-		return -ENOMEM;
-	memset(data, 0x00, sizeof(tonga_hwmgr));
-
-	hwmgr->backend = data;
 	hwmgr->hwmgr_func = &tonga_hwmgr_funcs;
 	hwmgr->pptable_func = &tonga_pptable_funcs;
 	pp_tonga_thermal_initialize(hwmgr);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
index 573cd39fe78d..3961884bfa9b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
@@ -352,9 +352,6 @@ struct tonga_hwmgr {
 	bool                           samu_power_gated; /* 1: gated, 0:not gated */
 	bool                           acp_power_gated;  /* 1: gated, 0:not gated */
 	bool                           pg_acp_init;
-
-	/* soft pptable for re-uploading into smu */
-	void *soft_pp_table;
 };
 
 typedef struct tonga_hwmgr tonga_hwmgr;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
index 1b44f4e9b8f5..f127198aafc4 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_pptable.h
@@ -197,6 +197,22 @@ typedef struct _ATOM_Tonga_SCLK_Dependency_Table {
 	ATOM_Tonga_SCLK_Dependency_Record entries[1];				 /* Dynamically allocate entries. */
 } ATOM_Tonga_SCLK_Dependency_Table;
 
+typedef struct _ATOM_Polaris_SCLK_Dependency_Record {
+	UCHAR  ucVddInd;											/* Base voltage */
+	USHORT usVddcOffset;										/* Offset relative to base voltage */
+	ULONG ulSclk;
+	USHORT usEdcCurrent;
+	UCHAR  ucReliabilityTemperature;
+	UCHAR  ucCKSVOffsetandDisable;			/* Bits 0~6: Voltage offset for CKS, Bit 7: Disable/enable for the SCLK level. */
+	ULONG  ulSclkOffset;
+} ATOM_Polaris_SCLK_Dependency_Record;
+
+typedef struct _ATOM_Polaris_SCLK_Dependency_Table {
+	UCHAR ucRevId;
+	UCHAR ucNumEntries;							/* Number of entries. */
+	ATOM_Polaris_SCLK_Dependency_Record entries[1];				 /* Dynamically allocate entries. */
+} ATOM_Polaris_SCLK_Dependency_Table;
+
 typedef struct _ATOM_Tonga_PCIE_Record {
 	UCHAR ucPCIEGenSpeed;
 	UCHAR usPCIELaneWidth;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
index 10e3630ee39d..94d6b472e1fe 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_processpptables.c
@@ -408,41 +408,78 @@ static int get_mclk_voltage_dependency_table(
 static int get_sclk_voltage_dependency_table(
 		struct pp_hwmgr *hwmgr,
 		phm_ppt_v1_clock_voltage_dependency_table **pp_tonga_sclk_dep_table,
-		const ATOM_Tonga_SCLK_Dependency_Table * sclk_dep_table
+		const PPTable_Generic_SubTable_Header *sclk_dep_table
 		)
 {
 	uint32_t table_size, i;
 	phm_ppt_v1_clock_voltage_dependency_table *sclk_table;
 
-	PP_ASSERT_WITH_CODE((0 != sclk_dep_table->ucNumEntries),
-		"Invalid PowerPlay Table!", return -1);
+	if (sclk_dep_table->ucRevId < 1) {
+		const ATOM_Tonga_SCLK_Dependency_Table *tonga_table =
+			    (ATOM_Tonga_SCLK_Dependency_Table *)sclk_dep_table;
 
-	table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
-		* sclk_dep_table->ucNumEntries;
+		PP_ASSERT_WITH_CODE((0 != tonga_table->ucNumEntries),
+			"Invalid PowerPlay Table!", return -1);
 
-	sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
-		kzalloc(table_size, GFP_KERNEL);
+		table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
+			* tonga_table->ucNumEntries;
 
-	if (NULL == sclk_table)
-		return -ENOMEM;
+		sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
+			kzalloc(table_size, GFP_KERNEL);
 
-	memset(sclk_table, 0x00, table_size);
-
-	sclk_table->count = (uint32_t)sclk_dep_table->ucNumEntries;
-
-	for (i = 0; i < sclk_dep_table->ucNumEntries; i++) {
-		sclk_table->entries[i].vddInd =
-			sclk_dep_table->entries[i].ucVddInd;
-		sclk_table->entries[i].vdd_offset =
-			sclk_dep_table->entries[i].usVddcOffset;
-		sclk_table->entries[i].clk =
-			sclk_dep_table->entries[i].ulSclk;
-		sclk_table->entries[i].cks_enable =
-			(((sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
-		sclk_table->entries[i].cks_voffset =
-			(sclk_dep_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
-	}
+		if (NULL == sclk_table)
+			return -ENOMEM;
 
+		memset(sclk_table, 0x00, table_size);
+
+		sclk_table->count = (uint32_t)tonga_table->ucNumEntries;
+
+		for (i = 0; i < tonga_table->ucNumEntries; i++) {
+			sclk_table->entries[i].vddInd =
+				tonga_table->entries[i].ucVddInd;
+			sclk_table->entries[i].vdd_offset =
+				tonga_table->entries[i].usVddcOffset;
+			sclk_table->entries[i].clk =
+				tonga_table->entries[i].ulSclk;
+			sclk_table->entries[i].cks_enable =
+				(((tonga_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
+			sclk_table->entries[i].cks_voffset =
+				(tonga_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
+		}
+	} else {
+		const ATOM_Polaris_SCLK_Dependency_Table *polaris_table =
+			    (ATOM_Polaris_SCLK_Dependency_Table *)sclk_dep_table;
+
+		PP_ASSERT_WITH_CODE((0 != polaris_table->ucNumEntries),
+			"Invalid PowerPlay Table!", return -1);
+
+		table_size = sizeof(uint32_t) + sizeof(phm_ppt_v1_clock_voltage_dependency_record)
+			* polaris_table->ucNumEntries;
+
+		sclk_table = (phm_ppt_v1_clock_voltage_dependency_table *)
+			kzalloc(table_size, GFP_KERNEL);
+
+		if (NULL == sclk_table)
+			return -ENOMEM;
+
+		memset(sclk_table, 0x00, table_size);
+
+		sclk_table->count = (uint32_t)polaris_table->ucNumEntries;
+
+		for (i = 0; i < polaris_table->ucNumEntries; i++) {
+			sclk_table->entries[i].vddInd =
+				polaris_table->entries[i].ucVddInd;
+			sclk_table->entries[i].vdd_offset =
+				polaris_table->entries[i].usVddcOffset;
+			sclk_table->entries[i].clk =
+				polaris_table->entries[i].ulSclk;
+			sclk_table->entries[i].cks_enable =
+				(((polaris_table->entries[i].ucCKSVOffsetandDisable & 0x80) >> 7) == 0) ? 1 : 0;
+			sclk_table->entries[i].cks_voffset =
+				(polaris_table->entries[i].ucCKSVOffsetandDisable & 0x7F);
+			sclk_table->entries[i].sclk_offset = polaris_table->entries[i].ulSclkOffset;
+		}
+	}
 	*pp_tonga_sclk_dep_table = sclk_table;
 
 	return 0;
@@ -708,8 +745,8 @@ static int init_clock_voltage_dependency(
 	const ATOM_Tonga_MCLK_Dependency_Table *mclk_dep_table =
 		(const ATOM_Tonga_MCLK_Dependency_Table *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usMclkDependencyTableOffset));
-	const ATOM_Tonga_SCLK_Dependency_Table *sclk_dep_table =
-		(const ATOM_Tonga_SCLK_Dependency_Table *)(((unsigned long) powerplay_table) +
+	const PPTable_Generic_SubTable_Header *sclk_dep_table =
+		(const PPTable_Generic_SubTable_Header *)(((unsigned long) powerplay_table) +
 		le16_to_cpu(powerplay_table->usSclkDependencyTableOffset));
 	const ATOM_Tonga_Hard_Limit_Table *pHardLimits =
 		(const ATOM_Tonga_Hard_Limit_Table *)(((unsigned long) powerplay_table) +
@@ -1040,48 +1077,41 @@ int tonga_pp_tables_uninitialize(struct pp_hwmgr *hwmgr)
 	struct phm_ppt_v1_information *pp_table_information =
 		(struct phm_ppt_v1_information *)(hwmgr->pptable);
 
-	if (NULL != hwmgr->soft_pp_table) {
-		kfree(hwmgr->soft_pp_table);
-		hwmgr->soft_pp_table = NULL;
-	}
-
-	if (NULL != pp_table_information->vdd_dep_on_sclk)
-		pp_table_information->vdd_dep_on_sclk = NULL;
+	kfree(pp_table_information->vdd_dep_on_sclk);
+	pp_table_information->vdd_dep_on_sclk = NULL;
 
-	if (NULL != pp_table_information->vdd_dep_on_mclk)
-		pp_table_information->vdd_dep_on_mclk = NULL;
+	kfree(pp_table_information->vdd_dep_on_mclk);
+	pp_table_information->vdd_dep_on_mclk = NULL;
 
-	if (NULL != pp_table_information->valid_mclk_values)
-		pp_table_information->valid_mclk_values = NULL;
+	kfree(pp_table_information->valid_mclk_values);
+	pp_table_information->valid_mclk_values = NULL;
 
-	if (NULL != pp_table_information->valid_sclk_values)
-		pp_table_information->valid_sclk_values = NULL;
+	kfree(pp_table_information->valid_sclk_values);
+	pp_table_information->valid_sclk_values = NULL;
 
-	if (NULL != pp_table_information->vddc_lookup_table)
-		pp_table_information->vddc_lookup_table = NULL;
+	kfree(pp_table_information->vddc_lookup_table);
+	pp_table_information->vddc_lookup_table = NULL;
 
-	if (NULL != pp_table_information->vddgfx_lookup_table)
-		pp_table_information->vddgfx_lookup_table = NULL;
+	kfree(pp_table_information->vddgfx_lookup_table);
+	pp_table_information->vddgfx_lookup_table = NULL;
 
-	if (NULL != pp_table_information->mm_dep_table)
-		pp_table_information->mm_dep_table = NULL;
+	kfree(pp_table_information->mm_dep_table);
+	pp_table_information->mm_dep_table = NULL;
 
-	if (NULL != pp_table_information->cac_dtp_table)
-		pp_table_information->cac_dtp_table = NULL;
+	kfree(pp_table_information->cac_dtp_table);
+	pp_table_information->cac_dtp_table = NULL;
 
-	if (NULL != hwmgr->dyn_state.cac_dtp_table)
-		hwmgr->dyn_state.cac_dtp_table = NULL;
+	kfree(hwmgr->dyn_state.cac_dtp_table);
+	hwmgr->dyn_state.cac_dtp_table = NULL;
 
-	if (NULL != pp_table_information->ppm_parameter_table)
-		pp_table_information->ppm_parameter_table = NULL;
+	kfree(pp_table_information->ppm_parameter_table);
+	pp_table_information->ppm_parameter_table = NULL;
 
-	if (NULL != pp_table_information->pcie_table)
-		pp_table_information->pcie_table = NULL;
+	kfree(pp_table_information->pcie_table);
+	pp_table_information->pcie_table = NULL;
 
-	if (NULL != hwmgr->pptable) {
-		kfree(hwmgr->pptable);
-		hwmgr->pptable = NULL;
-	}
+	kfree(hwmgr->pptable);
+	hwmgr->pptable = NULL;
 
 	return result;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
index 50b367d44307..b764c8c05ec8 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
@@ -132,6 +132,7 @@ struct amd_pp_init {
 	uint32_t chip_family;
 	uint32_t chip_id;
 	uint32_t rev_id;
+	bool powercontainment_enabled;
 };
 enum amd_pp_display_config_type{
 	AMD_PP_DisplayConfigType_None = 0,
@@ -342,6 +343,10 @@ struct amd_powerplay_funcs {
 	int (*set_pp_table)(void *handle, const char *buf, size_t size);
 	int (*force_clock_level)(void *handle, enum pp_clock_type type, uint32_t mask);
 	int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf);
+	int (*get_sclk_od)(void *handle);
+	int (*set_sclk_od)(void *handle, uint32_t value);
+	int (*get_mclk_od)(void *handle);
+	int (*set_mclk_od)(void *handle, uint32_t value);
 };
 
 struct amd_powerplay {
@@ -355,6 +360,8 @@ int amd_powerplay_init(struct amd_pp_init *pp_init,
 
 int amd_powerplay_fini(void *handle);
 
+int amd_powerplay_reset(void *handle);
+
 int amd_powerplay_display_configuration_change(void *handle,
 		const struct amd_pp_display_configuration *input);
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 56f712c7d07a..962cb5385951 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -340,6 +340,7 @@ extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate);
 extern int phm_powerdown_uvd(struct pp_hwmgr *hwmgr);
 extern int phm_setup_asic(struct pp_hwmgr *hwmgr);
 extern int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr);
+extern int phm_disable_dynamic_state_management(struct pp_hwmgr *hwmgr);
 extern void phm_init_dynamic_caps(struct pp_hwmgr *hwmgr);
 extern bool phm_is_hw_access_blocked(struct pp_hwmgr *hwmgr);
 extern int phm_block_hw_access(struct pp_hwmgr *hwmgr, bool block);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index 28f571449495..0bbc42a224e5 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -278,6 +278,8 @@ struct pp_hwmgr_func {
 
 	int (*dynamic_state_management_enable)(
 						struct pp_hwmgr *hw_mgr);
+	int (*dynamic_state_management_disable)(
+						struct pp_hwmgr *hw_mgr);
 
 	int (*patch_boot_state)(struct pp_hwmgr *hwmgr,
 				     struct pp_hw_power_state *hw_ps);
@@ -333,11 +335,13 @@ struct pp_hwmgr_func {
 	int (*get_clock_by_type)(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
 	int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
 	int (*power_off_asic)(struct pp_hwmgr *hwmgr);
-	int (*get_pp_table)(struct pp_hwmgr *hwmgr, char **table);
-	int (*set_pp_table)(struct pp_hwmgr *hwmgr, const char *buf, size_t size);
 	int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, uint32_t mask);
 	int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf);
 	int (*enable_per_cu_power_gating)(struct pp_hwmgr *hwmgr, bool enable);
+	int (*get_sclk_od)(struct pp_hwmgr *hwmgr);
+	int (*set_sclk_od)(struct pp_hwmgr *hwmgr, uint32_t value);
+	int (*get_mclk_od)(struct pp_hwmgr *hwmgr);
+	int (*set_mclk_od)(struct pp_hwmgr *hwmgr, uint32_t value);
 };
 
 struct pp_table_func {
@@ -578,6 +582,7 @@ struct pp_hwmgr {
 	struct pp_smumgr *smumgr;
 	const void *soft_pp_table;
 	uint32_t soft_pp_table_size;
+	void *hardcode_pp_table;
 	bool need_pp_table_upload;
 	enum amd_dpm_forced_level dpm_level;
 	bool block_hw_access;
@@ -607,6 +612,7 @@ struct pp_hwmgr {
 	uint32_t num_ps;
 	struct pp_thermal_controller_info thermal_controller;
 	bool fan_ctrl_is_in_default_mode;
+	bool powercontainment_enabled;
 	uint32_t fan_ctrl_default_mode;
 	uint32_t tmin;
 	struct phm_microcode_version_info microcode_version_info;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
index 0c6a413eaa5b..d41d37ab5b7c 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/polaris10_ppsmc.h
@@ -27,6 +27,7 @@
 
 #pragma pack(push, 1)
 
+#define PPSMC_MSG_SetGBDroopSettings          ((uint16_t) 0x305)
 
 #define PPSMC_SWSTATE_FLAG_DC                           0x01
 #define PPSMC_SWSTATE_FLAG_UVD                          0x02
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
index 3bd5e69b9045..3df5de2cdab0 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_acpi.h
@@ -26,3 +26,4 @@ extern bool acpi_atcs_functions_supported(void *device,
 extern int acpi_pcie_perf_request(void *device,
 						uint8_t perf_req,
 						bool advertise);
+extern bool acpi_atcs_notify_pcie_device_ready(void *device);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74.h b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
index 1a12d85b8e97..fd10a9fa843d 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74.h
@@ -34,6 +34,30 @@
 #define SMU__NUM_LCLK_DPM_LEVELS 8
 #define SMU__NUM_PCIE_DPM_LEVELS 8
 
+#define EXP_M1  35
+#define EXP_M2  92821
+#define EXP_B   66629747
+
+#define EXP_M1_1  365
+#define EXP_M2_1  658700
+#define EXP_B_1   305506134
+
+#define EXP_M1_2  189
+#define EXP_M2_2  379692
+#define EXP_B_2   194609469
+
+#define EXP_M1_3  99
+#define EXP_M2_3  217915
+#define EXP_B_3   122255994
+
+#define EXP_M1_4  51
+#define EXP_M2_4  122643
+#define EXP_B_4   74893384
+
+#define EXP_M1_5  423
+#define EXP_M2_5  1103326
+#define EXP_B_5   728122621
+
 enum SID_OPTION {
 	SID_OPTION_HI,
 	SID_OPTION_LO,
@@ -548,20 +572,20 @@ struct SMU74_Firmware_Header {
 	uint32_t CacConfigTable;
 	uint32_t CacStatusTable;
 
-
 	uint32_t mcRegisterTable;
 
-
 	uint32_t mcArbDramTimingTable;
 
-
-
-
 	uint32_t PmFuseTable;
 	uint32_t Globals;
 	uint32_t ClockStretcherTable;
 	uint32_t VftTable;
-	uint32_t Reserved[21];
+	uint32_t Reserved1;
+	uint32_t AvfsTable;
+	uint32_t AvfsCksOffGbvTable;
+	uint32_t AvfsMeanNSigma;
+	uint32_t AvfsSclkOffsetTable;
+	uint32_t Reserved[16];
 	uint32_t Signature;
 };
 
@@ -701,8 +725,6 @@ VR Config info is contained in dpmTable.VRConfig */
 struct SMU_ClockStretcherDataTableEntry {
 	uint8_t minVID;
 	uint8_t maxVID;
-
-
 	uint16_t setting;
 };
 typedef struct SMU_ClockStretcherDataTableEntry SMU_ClockStretcherDataTableEntry;
@@ -769,6 +791,43 @@ struct VFT_TABLE_t {
 typedef struct VFT_TABLE_t VFT_TABLE_t;
 
 
+/* Total margin, root mean square of Fmax + DC + Platform */
+struct AVFS_Margin_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_Margin_t AVFS_Margin_t;
+
+#define BTCGB_VDROOP_TABLE_MAX_ENTRIES 2
+#define AVFSGB_VDROOP_TABLE_MAX_ENTRIES 2
+
+struct GB_VDROOP_TABLE_t {
+	int32_t a0;
+	int32_t a1;
+	int32_t a2;
+	uint32_t spare;
+};
+typedef struct GB_VDROOP_TABLE_t GB_VDROOP_TABLE_t;
+
+struct AVFS_CksOff_Gbv_t {
+	VFT_CELL_t Cell[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_CksOff_Gbv_t AVFS_CksOff_Gbv_t;
+
+struct AVFS_meanNsigma_t {
+	uint32_t Aconstant[3];
+	uint16_t DC_tol_sigma;
+	uint16_t Platform_mean;
+	uint16_t Platform_sigma;
+	uint16_t PSM_Age_CompFactor;
+	uint8_t  Static_Voltage_Offset[NUM_VFT_COLUMNS];
+};
+typedef struct AVFS_meanNsigma_t AVFS_meanNsigma_t;
+
+struct AVFS_Sclk_Offset_t {
+	uint16_t Sclk_Offset[8];
+};
+typedef struct AVFS_Sclk_Offset_t AVFS_Sclk_Offset_t;
+
 #endif
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
index 0dfe82336dc7..b85ff5400e57 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu74_discrete.h
@@ -223,6 +223,16 @@ struct SMU74_Discrete_StateInfo {
 
 typedef struct SMU74_Discrete_StateInfo SMU74_Discrete_StateInfo;
 
+struct SMU_QuadraticCoeffs {
+	int32_t m1;
+	uint32_t b;
+
+	int16_t m2;
+	uint8_t m1_shift;
+	uint8_t m2_shift;
+};
+typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
+
 struct SMU74_Discrete_DpmTable {
 
 	SMU74_PIDController                  GraphicsPIDController;
@@ -258,7 +268,14 @@ struct SMU74_Discrete_DpmTable {
 	uint8_t                             ThermOutPolarity;
 	uint8_t                             ThermOutMode;
 	uint8_t                             BootPhases;
-	uint32_t                            Reserved[4];
+
+	uint8_t                             VRHotLevel;
+	uint8_t                             Reserved1[3];
+	uint16_t                            FanStartTemperature;
+	uint16_t                            FanStopTemperature;
+	uint16_t                            MaxVoltage;
+	uint16_t                            Reserved2;
+	uint32_t                            Reserved[1];
 
 	SMU74_Discrete_GraphicsLevel        GraphicsLevel[SMU74_MAX_LEVELS_GRAPHICS];
 	SMU74_Discrete_MemoryLevel          MemoryACPILevel;
@@ -347,6 +364,8 @@ struct SMU74_Discrete_DpmTable {
 
 	uint32_t                            CurrSclkPllRange;
 	sclkFcwRange_t                      SclkFcwRangeTable[NUM_SCLK_RANGE];
+	GB_VDROOP_TABLE_t                   BTCGB_VDROOP_TABLE[BTCGB_VDROOP_TABLE_MAX_ENTRIES];
+	SMU_QuadraticCoeffs                 AVFSGB_VDROOP_TABLE[AVFSGB_VDROOP_TABLE_MAX_ENTRIES];
 };
 
 typedef struct SMU74_Discrete_DpmTable SMU74_Discrete_DpmTable;
@@ -550,16 +569,6 @@ struct SMU7_AcpiScoreboard {
 
 typedef struct SMU7_AcpiScoreboard SMU7_AcpiScoreboard;
 
-struct SMU_QuadraticCoeffs {
-	int32_t m1;
-	uint32_t b;
-
-	int16_t m2;
-	uint8_t m1_shift;
-	uint8_t m2_shift;
-};
-typedef struct SMU_QuadraticCoeffs SMU_QuadraticCoeffs;
-
 struct SMU74_Discrete_PmFuses {
 	uint8_t BapmVddCVidHiSidd[8];
 	uint8_t BapmVddCVidLoSidd[8];
@@ -821,6 +830,17 @@ typedef struct SMU7_GfxCuPgScoreboard SMU7_GfxCuPgScoreboard;
 #define DB_PCC_SHIFT 26 
 #define DB_EDC_SHIFT 27
 
+#define BTCGB0_Vdroop_Enable_MASK  0x1
+#define BTCGB1_Vdroop_Enable_MASK  0x2
+#define AVFSGB0_Vdroop_Enable_MASK 0x4
+#define AVFSGB1_Vdroop_Enable_MASK 0x8
+
+#define BTCGB0_Vdroop_Enable_SHIFT  0
+#define BTCGB1_Vdroop_Enable_SHIFT  1
+#define AVFSGB0_Vdroop_Enable_SHIFT 2
+#define AVFSGB1_Vdroop_Enable_SHIFT 3
+
+
 #pragma pack(pop)
 
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
index 673a75c74e18..8e52a2e82db5 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.c
@@ -1006,10 +1006,16 @@ static int fiji_smu_init(struct pp_smumgr *smumgr)
 
 static int fiji_smu_fini(struct pp_smumgr *smumgr)
 {
+	struct fiji_smumgr *priv = (struct fiji_smumgr *)(smumgr->backend);
+
+	smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle);
+
 	if (smumgr->backend) {
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index de618ead9db8..5dba7c509710 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -52,19 +52,18 @@
 static const SMU74_Discrete_GraphicsLevel avfs_graphics_level_polaris10[8] = {
 	/*  Min      pcie   DeepSleep Activity  CgSpll      CgSpll    CcPwr  CcPwr  Sclk         Enabled      Enabled                       Voltage    Power */
 	/* Voltage, DpmLevel, DivId,  Level,  FuncCntl3,  FuncCntl4,  DynRm, DynRm1 Did, Padding,ForActivity, ForThrottle, UpHyst, DownHyst, DownHyst, Throttle */
-	{ 0x3c0fd047, 0x00, 0x03, 0x1e00, 0x00200410, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x30750000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xa00fd047, 0x01, 0x04, 0x1e00, 0x00800510, 0x87020000, 0, 0, 0x16, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x409c0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x0410d047, 0x01, 0x00, 0x1e00, 0x00600410, 0x87020000, 0, 0, 0x0e, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x50c30000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x6810d047, 0x01, 0x00, 0x1e00, 0x00800410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x60ea0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xcc10d047, 0x01, 0x00, 0x1e00, 0x00e00410, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xe8fd0000, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x3011d047, 0x01, 0x00, 0x1e00, 0x00400510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x70110100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0x9411d047, 0x01, 0x00, 0x1e00, 0x00a00510, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0xf8240100, 0, 0, 0, 0, 0, 0, 0 } },
-	{ 0xf811d047, 0x01, 0x00, 0x1e00, 0x00000610, 0x87020000, 0, 0, 0x0c, 0, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, { 0x80380100, 0, 0, 0, 0, 0, 0, 0 } }
+	{ 0x100ea446, 0x00, 0x03, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x30750000, 0x3000, 0, 0x2600, 0, 0, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0x400ea446, 0x01, 0x04, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x409c0000, 0x2000, 0, 0x1e00, 1, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x740ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x50c30000, 0x2800, 0, 0x2000, 1, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } },
+	{ 0xa40ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x60ea0000, 0x3000, 0, 0x2600, 1, 1, 0x0004, 0x8f02, 0xffff, 0x2f00, 0x300e, 0x2700 } },
+	{ 0xd80ea446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x70110100, 0x3800, 0, 0x2c00, 1, 1, 0x0004, 0x1203, 0xffff, 0x3600, 0xc9e2, 0x2e00 } },
+	{ 0x3c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x80380100, 0x2000, 0, 0x1e00, 2, 1, 0x0004, 0x8300, 0xffff, 0x1f00, 0xcb5e, 0x1a00 } },
+	{ 0x6c0fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0x905f0100, 0x2400, 0, 0x1e00, 2, 1, 0x0004, 0x8901, 0xffff, 0x2300, 0x314c, 0x1d00 } },
+	{ 0xa00fa446, 0x01, 0x00, 0x3200, 0, 0, 0, 0, 0, 0, 0x01, 0x01, 0x0a, 0x00, 0x00, 0x00, { 0xa0860100, 0x2800, 0, 0x2000, 2, 1, 0x0004, 0x0c02, 0xffff, 0x2700, 0x6433, 0x2100 } }
 };
 
 static const SMU74_Discrete_MemoryLevel avfs_memory_level_polaris10 =
-	{0x50140000, 0x50140000, 0x00320000, 0x00, 0x00,
-	 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x0000, 0x00, 0x00};
+	{0x100ea446, 0, 0x30750000, 0x01, 0x01, 0x01, 0x00, 0x00, 0x64, 0x00, 0x00, 0x1f00, 0x00, 0x00};
 
 /**
 * Set the address for reading/writing the SMC SRAM space.
@@ -219,6 +218,18 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 	&& (0x20100 <= cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMC_PC_C)));
 }
 
+static bool polaris10_is_hw_avfs_present(struct pp_smumgr *smumgr)
+{
+	uint32_t efuse;
+
+	efuse = cgs_read_ind_register(smumgr->device, CGS_IND_REG__SMC, ixSMU_EFUSE_0 + (49*4));
+	efuse &= 0x00000001;
+	if (efuse)
+		return true;
+
+	return false;
+}
+
 /**
 * Send a message to the SMC, and wait for its response.
 *
@@ -228,21 +239,27 @@ bool polaris10_is_smc_ram_running(struct pp_smumgr *smumgr)
 */
 int polaris10_send_msg_to_smc(struct pp_smumgr *smumgr, uint16_t msg)
 {
+	int ret;
+
 	if (!polaris10_is_smc_ram_running(smumgr))
 		return -1;
 
+
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Previous Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
 
+	if (ret != 1)
+		printk("\n failed to send pre message %x ret is %d \n",  msg, ret);
 
 	cgs_write_register(smumgr->device, mmSMC_MESSAGE_0, msg);
 
 	SMUM_WAIT_FIELD_UNEQUAL(smumgr, SMC_RESP_0, SMC_RESP, 0);
 
-	if (1 != SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP))
-		printk("Failed to send Message.\n");
+	ret = SMUM_READ_FIELD(smumgr->device, SMC_RESP_0, SMC_RESP);
+
+	if (ret != 1)
+		printk("\n failed to send message %x ret is %d \n",  msg, ret);
 
 	return 0;
 }
@@ -469,6 +486,7 @@ int polaris10_smu_fini(struct pp_smumgr *smumgr)
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
@@ -952,6 +970,11 @@ static int polaris10_smu_init(struct pp_smumgr *smumgr)
 		(cgs_handle_t)smu_data->smu_buffer.handle);
 		return -1;);
 
+	if (polaris10_is_hw_avfs_present(smumgr))
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_BOOT;
+	else
+		smu_data->avfs.avfs_btc_status = AVFS_BTC_NOTSUPPORTED;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index c483baf6b4fb..7723473e51a0 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/slab.h>
+#include <drm/amdgpu_drm.h>
 #include "pp_instance.h"
 #include "smumgr.h"
 #include "cgs_common.h"
@@ -52,10 +53,10 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 	handle->smu_mgr = smumgr;
 
 	switch (smumgr->chip_family) {
-	case AMD_FAMILY_CZ:
+	case AMDGPU_FAMILY_CZ:
 		cz_smum_init(smumgr);
 		break;
-	case AMD_FAMILY_VI:
+	case AMDGPU_FAMILY_VI:
 		switch (smumgr->chip_id) {
 		case CHIP_TONGA:
 			tonga_smum_init(smumgr);
@@ -81,6 +82,7 @@ int smum_init(struct amd_pp_init *pp_init, struct pp_instance *handle)
 
 int smum_fini(struct pp_smumgr *smumgr)
 {
+	kfree(smumgr->device);
 	kfree(smumgr);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
index 32820b680d88..b22722eabafc 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/tonga_smumgr.c
@@ -328,10 +328,17 @@ int tonga_write_smc_sram_dword(struct pp_smumgr *smumgr,
 
 static int tonga_smu_fini(struct pp_smumgr *smumgr)
 {
+	struct tonga_smumgr *priv = (struct tonga_smumgr *)(smumgr->backend);
+
+	smu_free_memory(smumgr->device, (void *)priv->smu_buffer.handle);
+	smu_free_memory(smumgr->device, (void *)priv->header_buffer.handle);
+
 	if (smumgr->backend != NULL) {
 		kfree(smumgr->backend);
 		smumgr->backend = NULL;
 	}
+
+	cgs_rel_firmware(smumgr->device, CGS_UCODE_ID_SMU);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
index c89dc777768f..b961a1c6caf3 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h
@@ -26,7 +26,7 @@ TRACE_EVENT(amd_sched_job,
 	    TP_fast_assign(
 			   __entry->entity = sched_job->s_entity;
 			   __entry->sched_job = sched_job;
-			   __entry->fence = &sched_job->s_fence->base;
+			   __entry->fence = &sched_job->s_fence->finished;
 			   __entry->name = sched_job->sched->name;
 			   __entry->job_count = kfifo_len(
 				   &sched_job->s_entity->job_queue) / sizeof(sched_job);
@@ -46,7 +46,7 @@ TRACE_EVENT(amd_sched_process_job,
 		    ),
 
 	    TP_fast_assign(
-		    __entry->fence = &fence->base;
+		    __entry->fence = &fence->finished;
 		    ),
 	    TP_printk("fence=%p signaled", __entry->fence)
 );
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index c16248cee779..70ff09d10885 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -32,6 +32,7 @@
 
 static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
 static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
+static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
 
 struct kmem_cache *sched_fence_slab;
 atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);
@@ -140,7 +141,7 @@ int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
 		return r;
 
 	atomic_set(&entity->fence_seq, 0);
-	entity->fence_context = fence_context_alloc(1);
+	entity->fence_context = fence_context_alloc(2);
 
 	return 0;
 }
@@ -251,17 +252,21 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
 
 	s_fence = to_amd_sched_fence(fence);
 	if (s_fence && s_fence->sched == sched) {
-		/* Fence is from the same scheduler */
-		if (test_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &fence->flags)) {
-			/* Ignore it when it is already scheduled */
-			fence_put(entity->dependency);
-			return false;
-		}
 
-		/* Wait for fence to be scheduled */
-		entity->cb.func = amd_sched_entity_clear_dep;
-		list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
-		return true;
+		/*
+		 * Fence is from the same scheduler, only need to wait for
+		 * it to be scheduled
+		 */
+		fence = fence_get(&s_fence->scheduled);
+		fence_put(entity->dependency);
+		entity->dependency = fence;
+		if (!fence_add_callback(fence, &entity->cb,
+					amd_sched_entity_clear_dep))
+			return true;
+
+		/* Ignore it when it is already scheduled */
+		fence_put(fence);
+		return false;
 	}
 
 	if (!fence_add_callback(entity->dependency, &entity->cb,
@@ -319,46 +324,108 @@ static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
 	return added;
 }
 
-static void amd_sched_free_job(struct fence *f, struct fence_cb *cb) {
-	struct amd_sched_job *job = container_of(cb, struct amd_sched_job, cb_free_job);
-	schedule_work(&job->work_free_job);
-}
-
 /* job_finish is called after hw fence signaled, and
  * the job had already been deleted from ring_mirror_list
  */
-void amd_sched_job_finish(struct amd_sched_job *s_job)
+static void amd_sched_job_finish(struct work_struct *work)
 {
-	struct amd_sched_job *next;
+	struct amd_sched_job *s_job = container_of(work, struct amd_sched_job,
+						   finish_work);
 	struct amd_gpu_scheduler *sched = s_job->sched;
 
+	/* remove job from ring_mirror_list */
+	spin_lock(&sched->job_list_lock);
+	list_del_init(&s_job->node);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
-		if (cancel_delayed_work(&s_job->work_tdr))
-			amd_sched_job_put(s_job);
+		struct amd_sched_job *next;
+
+		spin_unlock(&sched->job_list_lock);
+		cancel_delayed_work_sync(&s_job->work_tdr);
+		spin_lock(&sched->job_list_lock);
 
 		/* queue TDR for next job */
 		next = list_first_entry_or_null(&sched->ring_mirror_list,
 						struct amd_sched_job, node);
 
-		if (next) {
-			INIT_DELAYED_WORK(&next->work_tdr, s_job->timeout_callback);
-			amd_sched_job_get(next);
+		if (next)
 			schedule_delayed_work(&next->work_tdr, sched->timeout);
-		}
 	}
+	spin_unlock(&sched->job_list_lock);
+	sched->ops->free_job(s_job);
 }
 
-void amd_sched_job_begin(struct amd_sched_job *s_job)
+static void amd_sched_job_finish_cb(struct fence *f, struct fence_cb *cb)
+{
+	struct amd_sched_job *job = container_of(cb, struct amd_sched_job,
+						 finish_cb);
+	schedule_work(&job->finish_work);
+}
+
+static void amd_sched_job_begin(struct amd_sched_job *s_job)
 {
 	struct amd_gpu_scheduler *sched = s_job->sched;
 
+	spin_lock(&sched->job_list_lock);
+	list_add_tail(&s_job->node, &sched->ring_mirror_list);
 	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
-		list_first_entry_or_null(&sched->ring_mirror_list, struct amd_sched_job, node) == s_job)
-	{
-		INIT_DELAYED_WORK(&s_job->work_tdr, s_job->timeout_callback);
-		amd_sched_job_get(s_job);
+	    list_first_entry_or_null(&sched->ring_mirror_list,
+				     struct amd_sched_job, node) == s_job)
+		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+	spin_unlock(&sched->job_list_lock);
+}
+
+static void amd_sched_job_timedout(struct work_struct *work)
+{
+	struct amd_sched_job *job = container_of(work, struct amd_sched_job,
+						 work_tdr.work);
+
+	job->sched->ops->timedout_job(job);
+}
+
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
+{
+	struct amd_sched_job *s_job;
+
+	spin_lock(&sched->job_list_lock);
+	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
+		if (fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
+			fence_put(s_job->s_fence->parent);
+			s_job->s_fence->parent = NULL;
+		}
+	}
+	spin_unlock(&sched->job_list_lock);
+}
+
+void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
+{
+	struct amd_sched_job *s_job;
+	int r;
+
+	spin_lock(&sched->job_list_lock);
+	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
+					 struct amd_sched_job, node);
+	if (s_job)
 		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
+
+	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+		struct amd_sched_fence *s_fence = s_job->s_fence;
+		struct fence *fence = sched->ops->run_job(s_job);
+		if (fence) {
+			s_fence->parent = fence_get(fence);
+			r = fence_add_callback(fence, &s_fence->cb,
+					       amd_sched_process_job);
+			if (r == -ENOENT)
+				amd_sched_process_job(fence, &s_fence->cb);
+			else if (r)
+				DRM_ERROR("fence add callback failed (%d)\n",
+					  r);
+			fence_put(fence);
+		} else {
+			DRM_ERROR("Failed to run job!\n");
+			amd_sched_process_job(NULL, &s_fence->cb);
+		}
 	}
+	spin_unlock(&sched->job_list_lock);
 }
 
 /**
@@ -372,36 +439,29 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
 {
 	struct amd_sched_entity *entity = sched_job->s_entity;
 
-	sched_job->use_sched = 1;
-	fence_add_callback(&sched_job->s_fence->base,
-					&sched_job->cb_free_job, amd_sched_free_job);
 	trace_amd_sched_job(sched_job);
+	fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb,
+			   amd_sched_job_finish_cb);
 	wait_event(entity->sched->job_scheduled,
 		   amd_sched_entity_in(sched_job));
 }
 
 /* init a sched_job with basic field */
 int amd_sched_job_init(struct amd_sched_job *job,
-						struct amd_gpu_scheduler *sched,
-						struct amd_sched_entity *entity,
-						void (*timeout_cb)(struct work_struct *work),
-						void (*free_cb)(struct kref *refcount),
-						void *owner, struct fence **fence)
+		       struct amd_gpu_scheduler *sched,
+		       struct amd_sched_entity *entity,
+		       void *owner)
 {
-	INIT_LIST_HEAD(&job->node);
-	kref_init(&job->refcount);
 	job->sched = sched;
 	job->s_entity = entity;
 	job->s_fence = amd_sched_fence_create(entity, owner);
 	if (!job->s_fence)
 		return -ENOMEM;
 
-	job->s_fence->s_job = job;
-	job->timeout_callback = timeout_cb;
-	job->free_callback = free_cb;
+	INIT_WORK(&job->finish_work, amd_sched_job_finish);
+	INIT_LIST_HEAD(&job->node);
+	INIT_DELAYED_WORK(&job->work_tdr, amd_sched_job_timedout);
 
-	if (fence)
-		*fence = &job->s_fence->base;
 	return 0;
 }
 
@@ -450,23 +510,25 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 	struct amd_sched_fence *s_fence =
 		container_of(cb, struct amd_sched_fence, cb);
 	struct amd_gpu_scheduler *sched = s_fence->sched;
-	unsigned long flags;
 
 	atomic_dec(&sched->hw_rq_count);
-
-	/* remove job from ring_mirror_list */
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_del_init(&s_fence->s_job->node);
-	sched->ops->finish_job(s_fence->s_job);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
-
-	amd_sched_fence_signal(s_fence);
+	amd_sched_fence_finished(s_fence);
 
 	trace_amd_sched_process_job(s_fence);
-	fence_put(&s_fence->base);
+	fence_put(&s_fence->finished);
 	wake_up_interruptible(&sched->wake_up_worker);
 }
 
+static bool amd_sched_blocked(struct amd_gpu_scheduler *sched)
+{
+	if (kthread_should_park()) {
+		kthread_parkme();
+		return true;
+	}
+
+	return false;
+}
+
 static int amd_sched_main(void *param)
 {
 	struct sched_param sparam = {.sched_priority = 1};
@@ -476,14 +538,15 @@ static int amd_sched_main(void *param)
 	sched_setscheduler(current, SCHED_FIFO, &sparam);
 
 	while (!kthread_should_stop()) {
-		struct amd_sched_entity *entity;
+		struct amd_sched_entity *entity = NULL;
 		struct amd_sched_fence *s_fence;
 		struct amd_sched_job *sched_job;
 		struct fence *fence;
 
 		wait_event_interruptible(sched->wake_up_worker,
-			(entity = amd_sched_select_entity(sched)) ||
-			kthread_should_stop());
+					 (!amd_sched_blocked(sched) &&
+					  (entity = amd_sched_select_entity(sched))) ||
+					 kthread_should_stop());
 
 		if (!entity)
 			continue;
@@ -495,16 +558,19 @@ static int amd_sched_main(void *param)
 		s_fence = sched_job->s_fence;
 
 		atomic_inc(&sched->hw_rq_count);
-		amd_sched_job_pre_schedule(sched, sched_job);
+		amd_sched_job_begin(sched_job);
+
 		fence = sched->ops->run_job(sched_job);
 		amd_sched_fence_scheduled(s_fence);
 		if (fence) {
+			s_fence->parent = fence_get(fence);
 			r = fence_add_callback(fence, &s_fence->cb,
 					       amd_sched_process_job);
 			if (r == -ENOENT)
 				amd_sched_process_job(fence, &s_fence->cb);
 			else if (r)
-				DRM_ERROR("fence add callback failed (%d)\n", r);
+				DRM_ERROR("fence add callback failed (%d)\n",
+					  r);
 			fence_put(fence);
 		} else {
 			DRM_ERROR("Failed to run job!\n");
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
index 070095a9433c..7cbbbfb502ef 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
@@ -27,8 +27,6 @@
 #include <linux/kfifo.h>
 #include <linux/fence.h>
 
-#define AMD_SCHED_FENCE_SCHEDULED_BIT	FENCE_FLAG_USER_BITS
-
 struct amd_gpu_scheduler;
 struct amd_sched_rq;
 
@@ -68,36 +66,34 @@ struct amd_sched_rq {
 };
 
 struct amd_sched_fence {
-	struct fence                    base;
+	struct fence                    scheduled;
+	struct fence                    finished;
 	struct fence_cb                 cb;
-	struct list_head		scheduled_cb;
+	struct fence                    *parent;
 	struct amd_gpu_scheduler	*sched;
 	spinlock_t			lock;
 	void                            *owner;
-	struct amd_sched_job	*s_job;
 };
 
 struct amd_sched_job {
-	struct kref refcount;
 	struct amd_gpu_scheduler        *sched;
 	struct amd_sched_entity         *s_entity;
 	struct amd_sched_fence          *s_fence;
-	bool	use_sched;	/* true if the job goes to scheduler */
-	struct fence_cb                cb_free_job;
-	struct work_struct             work_free_job;
-	struct list_head			   node;
-	struct delayed_work work_tdr;
-	void (*timeout_callback) (struct work_struct *work);
-	void (*free_callback)(struct kref *refcount);
+	struct fence_cb			finish_cb;
+	struct work_struct		finish_work;
+	struct list_head		node;
+	struct delayed_work		work_tdr;
 };
 
-extern const struct fence_ops amd_sched_fence_ops;
+extern const struct fence_ops amd_sched_fence_ops_scheduled;
+extern const struct fence_ops amd_sched_fence_ops_finished;
 static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
 {
-	struct amd_sched_fence *__f = container_of(f, struct amd_sched_fence, base);
+	if (f->ops == &amd_sched_fence_ops_scheduled)
+		return container_of(f, struct amd_sched_fence, scheduled);
 
-	if (__f->base.ops == &amd_sched_fence_ops)
-		return __f;
+	if (f->ops == &amd_sched_fence_ops_finished)
+		return container_of(f, struct amd_sched_fence, finished);
 
 	return NULL;
 }
@@ -109,8 +105,8 @@ static inline struct amd_sched_fence *to_amd_sched_fence(struct fence *f)
 struct amd_sched_backend_ops {
 	struct fence *(*dependency)(struct amd_sched_job *sched_job);
 	struct fence *(*run_job)(struct amd_sched_job *sched_job);
-	void (*begin_job)(struct amd_sched_job *sched_job);
-	void (*finish_job)(struct amd_sched_job *sched_job);
+	void (*timedout_job)(struct amd_sched_job *sched_job);
+	void (*free_job)(struct amd_sched_job *sched_job);
 };
 
 enum amd_sched_priority {
@@ -152,25 +148,11 @@ void amd_sched_entity_push_job(struct amd_sched_job *sched_job);
 struct amd_sched_fence *amd_sched_fence_create(
 	struct amd_sched_entity *s_entity, void *owner);
 void amd_sched_fence_scheduled(struct amd_sched_fence *fence);
-void amd_sched_fence_signal(struct amd_sched_fence *fence);
+void amd_sched_fence_finished(struct amd_sched_fence *fence);
 int amd_sched_job_init(struct amd_sched_job *job,
-					struct amd_gpu_scheduler *sched,
-					struct amd_sched_entity *entity,
-					void (*timeout_cb)(struct work_struct *work),
-					void (*free_cb)(struct kref* refcount),
-					void *owner, struct fence **fence);
-void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
-								struct amd_sched_job *s_job);
-void amd_sched_job_finish(struct amd_sched_job *s_job);
-void amd_sched_job_begin(struct amd_sched_job *s_job);
-static inline void amd_sched_job_get(struct amd_sched_job *job) {
-	if (job)
-		kref_get(&job->refcount);
-}
-
-static inline void amd_sched_job_put(struct amd_sched_job *job) {
-	if (job)
-		kref_put(&job->refcount, job->free_callback);
-}
-
+		       struct amd_gpu_scheduler *sched,
+		       struct amd_sched_entity *entity,
+		       void *owner);
+void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
+void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
 #endif
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 2a732c490375..6b63beaf7574 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -27,7 +27,8 @@
 #include <drm/drmP.h>
 #include "gpu_scheduler.h"
 
-struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity, void *owner)
+struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *entity,
+					       void *owner)
 {
 	struct amd_sched_fence *fence = NULL;
 	unsigned seq;
@@ -36,46 +37,37 @@ struct amd_sched_fence *amd_sched_fence_create(struct amd_sched_entity *s_entity
 	if (fence == NULL)
 		return NULL;
 
-	INIT_LIST_HEAD(&fence->scheduled_cb);
 	fence->owner = owner;
-	fence->sched = s_entity->sched;
+	fence->sched = entity->sched;
 	spin_lock_init(&fence->lock);
 
-	seq = atomic_inc_return(&s_entity->fence_seq);
-	fence_init(&fence->base, &amd_sched_fence_ops, &fence->lock,
-		   s_entity->fence_context, seq);
+	seq = atomic_inc_return(&entity->fence_seq);
+	fence_init(&fence->scheduled, &amd_sched_fence_ops_scheduled,
+		   &fence->lock, entity->fence_context, seq);
+	fence_init(&fence->finished, &amd_sched_fence_ops_finished,
+		   &fence->lock, entity->fence_context + 1, seq);
 
 	return fence;
 }
 
-void amd_sched_fence_signal(struct amd_sched_fence *fence)
+void amd_sched_fence_scheduled(struct amd_sched_fence *fence)
 {
-	int ret = fence_signal(&fence->base);
+	int ret = fence_signal(&fence->scheduled);
+
 	if (!ret)
-		FENCE_TRACE(&fence->base, "signaled from irq context\n");
+		FENCE_TRACE(&fence->scheduled, "signaled from irq context\n");
 	else
-		FENCE_TRACE(&fence->base, "was already signaled\n");
-}
-
-void amd_sched_job_pre_schedule(struct amd_gpu_scheduler *sched ,
-				struct amd_sched_job *s_job)
-{
-	unsigned long flags;
-	spin_lock_irqsave(&sched->job_list_lock, flags);
-	list_add_tail(&s_job->node, &sched->ring_mirror_list);
-	sched->ops->begin_job(s_job);
-	spin_unlock_irqrestore(&sched->job_list_lock, flags);
+		FENCE_TRACE(&fence->scheduled, "was already signaled\n");
 }
 
-void amd_sched_fence_scheduled(struct amd_sched_fence *s_fence)
+void amd_sched_fence_finished(struct amd_sched_fence *fence)
 {
-	struct fence_cb *cur, *tmp;
+	int ret = fence_signal(&fence->finished);
 
-	set_bit(AMD_SCHED_FENCE_SCHEDULED_BIT, &s_fence->base.flags);
-	list_for_each_entry_safe(cur, tmp, &s_fence->scheduled_cb, node) {
-		list_del_init(&cur->node);
-		cur->func(&s_fence->base, cur);
-	}
+	if (!ret)
+		FENCE_TRACE(&fence->finished, "signaled from irq context\n");
+	else
+		FENCE_TRACE(&fence->finished, "was already signaled\n");
 }
 
 static const char *amd_sched_fence_get_driver_name(struct fence *fence)
@@ -105,6 +97,8 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
 {
 	struct fence *f = container_of(rcu, struct fence, rcu);
 	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	fence_put(fence->parent);
 	kmem_cache_free(sched_fence_slab, fence);
 }
 
@@ -116,16 +110,41 @@ static void amd_sched_fence_free(struct rcu_head *rcu)
  * This function is called when the reference count becomes zero.
  * It just RCU schedules freeing up the fence.
  */
-static void amd_sched_fence_release(struct fence *f)
+static void amd_sched_fence_release_scheduled(struct fence *f)
 {
-	call_rcu(&f->rcu, amd_sched_fence_free);
+	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	call_rcu(&fence->finished.rcu, amd_sched_fence_free);
 }
 
-const struct fence_ops amd_sched_fence_ops = {
+/**
+ * amd_sched_fence_release_scheduled - drop extra reference
+ *
+ * @f: fence
+ *
+ * Drop the extra reference from the scheduled fence to the base fence.
+ */
+static void amd_sched_fence_release_finished(struct fence *f)
+{
+	struct amd_sched_fence *fence = to_amd_sched_fence(f);
+
+	fence_put(&fence->scheduled);
+}
+
+const struct fence_ops amd_sched_fence_ops_scheduled = {
+	.get_driver_name = amd_sched_fence_get_driver_name,
+	.get_timeline_name = amd_sched_fence_get_timeline_name,
+	.enable_signaling = amd_sched_fence_enable_signaling,
+	.signaled = NULL,
+	.wait = fence_default_wait,
+	.release = amd_sched_fence_release_scheduled,
+};
+
+const struct fence_ops amd_sched_fence_ops_finished = {
 	.get_driver_name = amd_sched_fence_get_driver_name,
 	.get_timeline_name = amd_sched_fence_get_timeline_name,
 	.enable_signaling = amd_sched_fence_enable_signaling,
 	.signaled = NULL,
 	.wait = fence_default_wait,
-	.release = amd_sched_fence_release,
+	.release = amd_sched_fence_release_finished,
 };
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 59f2f93b6f84..b29a41218fc9 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -186,17 +186,6 @@ static void ast_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *
 {
 }
 
-static int ast_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void ast_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver ast_bo_driver = {
 	.ttm_tt_unpopulate = ast_ttm_tt_unpopulate,
 	.init_mem_type = ast_bo_init_mem_type,
 	.evict_flags = ast_bo_evict_flags,
-	.move = ast_bo_move,
+	.move = NULL,
 	.verify_access = ast_bo_verify_access,
 	.io_mem_reserve = &ast_ttm_io_mem_reserve,
 	.io_mem_free = &ast_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index 473a475f27b1..6119b5085501 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -254,9 +254,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_check_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	for_each_endpoint_of_node(dev->dev->of_node, ep_np) {
@@ -264,9 +265,10 @@ int atmel_hlcdc_create_outputs(struct drm_device *dev)
 		if (!ret)
 			ret = atmel_hlcdc_attach_endpoint(dev, &ep);
 
-		of_node_put(ep_np);
-		if (ret)
+		if (ret) {
+			of_node_put(ep_np);
 			return ret;
+		}
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index aef3ca8a81fa..016c191221f3 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -339,6 +339,8 @@ atmel_hlcdc_plane_update_pos_and_size(struct atmel_hlcdc_plane *plane,
 
 		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff,
 					     factor_reg);
+	} else {
+		atmel_hlcdc_layer_update_cfg(&plane->layer, 13, 0xffffffff, 0);
 	}
 }
 
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 6cf912c45e48..b109fdcaa679 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -165,15 +165,6 @@ static void bochs_ttm_io_mem_free(struct ttm_bo_device *bdev,
 {
 }
 
-static int bochs_bo_move(struct ttm_buffer_object *bo,
-			 bool evict, bool interruptible,
-			 bool no_wait_gpu,
-			 struct ttm_mem_reg *new_mem)
-{
-	return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-}
-
-
 static void bochs_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -208,7 +199,7 @@ struct ttm_bo_driver bochs_bo_driver = {
 	.ttm_tt_unpopulate = ttm_pool_unpopulate,
 	.init_mem_type = bochs_bo_init_mem_type,
 	.evict_flags = bochs_bo_evict_flags,
-	.move = bochs_bo_move,
+	.move = NULL,
 	.verify_access = bochs_bo_verify_access,
 	.io_mem_reserve = &bochs_ttm_io_mem_reserve,
 	.io_mem_free = &bochs_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index 6768b7b1af32..1cc9ee607128 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -186,17 +186,6 @@ static void cirrus_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 {
 }
 
-static int cirrus_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void cirrus_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver cirrus_bo_driver = {
 	.ttm_tt_unpopulate = cirrus_ttm_tt_unpopulate,
 	.init_mem_type = cirrus_bo_init_mem_type,
 	.evict_flags = cirrus_bo_evict_flags,
-	.move = cirrus_bo_move,
+	.move = NULL,
 	.verify_access = cirrus_bo_verify_access,
 	.io_mem_reserve = &cirrus_ttm_io_mem_reserve,
 	.io_mem_free = &cirrus_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index dac0875e669c..9359be4a0ca9 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1299,14 +1299,39 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes);
  */
 void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
 {
+	struct drm_device *dev = state->dev;
+	unsigned crtc_mask = 0;
+	struct drm_crtc *crtc;
 	int ret;
+	bool global = false;
+
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->acquire_ctx != state->acquire_ctx)
+			continue;
+
+		crtc_mask |= drm_crtc_mask(crtc);
+		crtc->acquire_ctx = NULL;
+	}
+
+	if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) {
+		global = true;
+
+		dev->mode_config.acquire_ctx = NULL;
+	}
 
 retry:
 	drm_modeset_backoff(state->acquire_ctx);
 
-	ret = drm_modeset_lock_all_ctx(state->dev, state->acquire_ctx);
+	ret = drm_modeset_lock_all_ctx(dev, state->acquire_ctx);
 	if (ret)
 		goto retry;
+
+	drm_for_each_crtc(crtc, dev)
+		if (drm_crtc_mask(crtc) & crtc_mask)
+			crtc->acquire_ctx = state->acquire_ctx;
+
+	if (global)
+		dev->mode_config.acquire_ctx = state->acquire_ctx;
 }
 EXPORT_SYMBOL(drm_atomic_legacy_backoff);
 
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index d61591274ff6..604d3ef72ffa 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -531,11 +531,11 @@ drm_crtc_helper_disable(struct drm_crtc *crtc)
 int drm_crtc_helper_set_config(struct drm_mode_set *set)
 {
 	struct drm_device *dev;
-	struct drm_crtc *new_crtc;
-	struct drm_encoder *save_encoders, *new_encoder, *encoder;
+	struct drm_crtc **save_encoder_crtcs, *new_crtc;
+	struct drm_encoder **save_connector_encoders, *new_encoder, *encoder;
 	bool mode_changed = false; /* if true do a full mode set */
 	bool fb_changed = false; /* if true and !mode_changed just do a flip */
-	struct drm_connector *save_connectors, *connector;
+	struct drm_connector *connector;
 	int count = 0, ro, fail = 0;
 	const struct drm_crtc_helper_funcs *crtc_funcs;
 	struct drm_mode_set save_set;
@@ -577,15 +577,15 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	 * Allocate space for the backup of all (non-pointer) encoder and
 	 * connector data.
 	 */
-	save_encoders = kzalloc(dev->mode_config.num_encoder *
-				sizeof(struct drm_encoder), GFP_KERNEL);
-	if (!save_encoders)
+	save_encoder_crtcs = kzalloc(dev->mode_config.num_encoder *
+				sizeof(struct drm_crtc *), GFP_KERNEL);
+	if (!save_encoder_crtcs)
 		return -ENOMEM;
 
-	save_connectors = kzalloc(dev->mode_config.num_connector *
-				sizeof(struct drm_connector), GFP_KERNEL);
-	if (!save_connectors) {
-		kfree(save_encoders);
+	save_connector_encoders = kzalloc(dev->mode_config.num_connector *
+				sizeof(struct drm_encoder *), GFP_KERNEL);
+	if (!save_connector_encoders) {
+		kfree(save_encoder_crtcs);
 		return -ENOMEM;
 	}
 
@@ -596,12 +596,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 	 */
 	count = 0;
 	drm_for_each_encoder(encoder, dev) {
-		save_encoders[count++] = *encoder;
+		save_encoder_crtcs[count++] = encoder->crtc;
 	}
 
 	count = 0;
 	drm_for_each_connector(connector, dev) {
-		save_connectors[count++] = *connector;
+		save_connector_encoders[count++] = connector->encoder;
 	}
 
 	save_set.crtc = set->crtc;
@@ -634,8 +634,12 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		mode_changed = true;
 	}
 
-	/* take a reference on all connectors in set */
+	/* take a reference on all unbound connectors in set, reuse the
+	 * already taken reference for bound connectors
+	 */
 	for (ro = 0; ro < set->num_connectors; ro++) {
+		if (set->connectors[ro]->encoder)
+			continue;
 		drm_connector_reference(set->connectors[ro]);
 	}
 
@@ -757,30 +761,28 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)
 		}
 	}
 
-	/* after fail drop reference on all connectors in save set */
-	count = 0;
-	drm_for_each_connector(connector, dev) {
-		drm_connector_unreference(&save_connectors[count++]);
-	}
-
-	kfree(save_connectors);
-	kfree(save_encoders);
+	kfree(save_connector_encoders);
+	kfree(save_encoder_crtcs);
 	return 0;
 
 fail:
 	/* Restore all previous data. */
 	count = 0;
 	drm_for_each_encoder(encoder, dev) {
-		*encoder = save_encoders[count++];
+		encoder->crtc = save_encoder_crtcs[count++];
 	}
 
 	count = 0;
 	drm_for_each_connector(connector, dev) {
-		*connector = save_connectors[count++];
+		connector->encoder = save_connector_encoders[count++];
 	}
 
-	/* after fail drop reference on all connectors in set */
+	/* after fail drop reference on all unbound connectors in set, let
+	 * bound connectors keep their reference
+	 */
 	for (ro = 0; ro < set->num_connectors; ro++) {
+		if (set->connectors[ro]->encoder)
+			continue;
 		drm_connector_unreference(set->connectors[ro]);
 	}
 
@@ -790,8 +792,8 @@ fail:
 				      save_set.y, save_set.fb))
 		DRM_ERROR("failed to restore config after modeset failure\n");
 
-	kfree(save_connectors);
-	kfree(save_encoders);
+	kfree(save_connector_encoders);
+	kfree(save_encoder_crtcs);
 	return ret;
 }
 EXPORT_SYMBOL(drm_crtc_helper_set_config);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index a13edf5de2d6..6537908050d7 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -2927,11 +2927,9 @@ static void drm_dp_destroy_connector_work(struct work_struct *work)
 		drm_dp_port_teardown_pdt(port, port->pdt);
 
 		if (!port->input && port->vcpi.vcpi > 0) {
-			if (mgr->mst_state) {
-				drm_dp_mst_reset_vcpi_slots(mgr, port);
-				drm_dp_update_payload_part1(mgr);
-				drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
-			}
+			drm_dp_mst_reset_vcpi_slots(mgr, port);
+			drm_dp_update_payload_part1(mgr);
+			drm_dp_mst_put_payload_id(mgr, port->vcpi.vcpi);
 		}
 
 		kref_put(&port->kref, drm_dp_free_mst_port);
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index c0b0c718994a..1fd6eac1400c 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -596,3 +596,18 @@ void drm_fbdev_cma_hotplug_event(struct drm_fbdev_cma *fbdev_cma)
 		drm_fb_helper_hotplug_event(&fbdev_cma->fb_helper);
 }
 EXPORT_SYMBOL_GPL(drm_fbdev_cma_hotplug_event);
+
+/**
+ * drm_fbdev_cma_set_suspend - wrapper around drm_fb_helper_set_suspend
+ * @fbdev_cma: The drm_fbdev_cma struct, may be NULL
+ * @state: desired state, zero to resume, non-zero to suspend
+ *
+ * Calls drm_fb_helper_set_suspend, which is a wrapper around
+ * fb_set_suspend implemented by fbdev core.
+ */
+void drm_fbdev_cma_set_suspend(struct drm_fbdev_cma *fbdev_cma, int state)
+{
+	if (fbdev_cma)
+		drm_fb_helper_set_suspend(&fbdev_cma->fb_helper, state);
+}
+EXPORT_SYMBOL(drm_fbdev_cma_set_suspend);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 340d390306d8..ffd1b32caa8d 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -91,10 +91,8 @@ static void load_gpu(struct drm_device *dev)
 			int ret;
 
 			ret = etnaviv_gpu_init(g);
-			if (ret) {
-				dev_err(g->dev, "hw init failed: %d\n", ret);
+			if (ret)
 				priv->gpu[i] = NULL;
-			}
 		}
 	}
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index ff6aa5dfb2d7..87ef34150d46 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -487,6 +487,47 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 	return 0;
 }
 
+static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu)
+{
+	u32 pmc, ppc;
+
+	/* enable clock gating */
+	ppc = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
+	ppc |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
+
+	/* Disable stall module clock gating for 4.3.0.1 and 4.3.0.2 revs */
+	if (gpu->identity.revision == 0x4301 ||
+	    gpu->identity.revision == 0x4302)
+		ppc |= VIVS_PM_POWER_CONTROLS_DISABLE_STALL_MODULE_CLOCK_GATING;
+
+	gpu_write(gpu, VIVS_PM_POWER_CONTROLS, ppc);
+
+	pmc = gpu_read(gpu, VIVS_PM_MODULE_CONTROLS);
+
+	/* Disable PA clock gating for GC400+ except for GC420 */
+	if (gpu->identity.model >= chipModel_GC400 &&
+	    gpu->identity.model != chipModel_GC420)
+		pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PA;
+
+	/*
+	 * Disable PE clock gating on revs < 5.0.0.0 when HZ is
+	 * present without a bug fix.
+	 */
+	if (gpu->identity.revision < 0x5000 &&
+	    gpu->identity.minor_features0 & chipMinorFeatures0_HZ &&
+	    !(gpu->identity.minor_features1 &
+	      chipMinorFeatures1_DISABLE_PE_GATING))
+		pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE;
+
+	if (gpu->identity.revision < 0x5422)
+		pmc |= BIT(15); /* Unknown bit */
+
+	pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ;
+	pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ;
+
+	gpu_write(gpu, VIVS_PM_MODULE_CONTROLS, pmc);
+}
+
 static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
 	u16 prefetch;
@@ -506,6 +547,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 		gpu_write(gpu, VIVS_MC_DEBUG_MEMORY, mc_memory_debug);
 	}
 
+	/* enable module-level clock gating */
+	etnaviv_gpu_enable_mlcg(gpu);
+
 	/*
 	 * Update GPU AXI cache atttribute to "cacheable, no allocate".
 	 * This is necessary to prevent the iMX6 SoC locking up.
@@ -553,8 +597,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	bool mmuv2;
 
 	ret = pm_runtime_get_sync(gpu->dev);
-	if (ret < 0)
+	if (ret < 0) {
+		dev_err(gpu->dev, "Failed to enable GPU power domain\n");
 		return ret;
+	}
 
 	etnaviv_hw_identify(gpu);
 
@@ -591,8 +637,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	ret = etnaviv_hw_reset(gpu);
-	if (ret)
+	if (ret) {
+		dev_err(gpu->dev, "GPU reset failed\n");
 		goto fail;
+	}
 
 	/* Setup IOMMU.. eventually we will (I think) do this once per context
 	 * and have separate page tables per context.  For now, to keep things
@@ -610,12 +658,14 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	if (!iommu) {
+		dev_err(gpu->dev, "Failed to allocate GPU IOMMU domain\n");
 		ret = -ENOMEM;
 		goto fail;
 	}
 
 	gpu->mmu = etnaviv_iommu_new(gpu, iommu, version);
 	if (!gpu->mmu) {
+		dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
 		iommu_domain_free(iommu);
 		ret = -ENOMEM;
 		goto fail;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 522cfd447892..16353ee81651 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -225,6 +225,7 @@ struct iommu_domain *etnaviv_iommu_domain_alloc(struct etnaviv_gpu *gpu)
 
 	etnaviv_domain->domain.type = __IOMMU_DOMAIN_PAGING;
 	etnaviv_domain->domain.ops = &etnaviv_iommu_ops.ops;
+	etnaviv_domain->domain.pgsize_bitmap = SZ_4K;
 	etnaviv_domain->domain.geometry.aperture_start = GPU_MEM_START;
 	etnaviv_domain->domain.geometry.aperture_end = GPU_MEM_START + PT_ENTRIES * SZ_4K - 1;
 
diff --git a/drivers/gpu/drm/etnaviv/state_hi.xml.h b/drivers/gpu/drm/etnaviv/state_hi.xml.h
index 6a7de5f1454a..807a3d9e0dd5 100644
--- a/drivers/gpu/drm/etnaviv/state_hi.xml.h
+++ b/drivers/gpu/drm/etnaviv/state_hi.xml.h
@@ -218,6 +218,13 @@ Copyright (C) 2015
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_FE	0x00000001
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_DE	0x00000002
 #define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PE	0x00000004
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SH	0x00000008
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_PA	0x00000010
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SE	0x00000020
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA	0x00000040
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX	0x00000080
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ	0x00010000
+#define VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ	0x00020000
 
 #define VIVS_PM_MODULE_STATUS					0x00000108
 #define VIVS_PM_MODULE_STATUS_MODULE_CLOCK_GATED_FE		0x00000001
diff --git a/drivers/gpu/drm/exynos/exynos7_drm_decon.c b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
index f6223f907c15..7f9901b7777b 100644
--- a/drivers/gpu/drm/exynos/exynos7_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos7_drm_decon.c
@@ -31,7 +31,6 @@
 #include "exynos_drm_plane.h"
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_iommu.h"
 
 /*
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index 468498e3fec1..4c1fb3f8b5a6 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -34,7 +34,7 @@
 
 struct exynos_dp_device {
 	struct drm_encoder         encoder;
-	struct drm_connector       connector;
+	struct drm_connector       *connector;
 	struct drm_bridge          *ptn_bridge;
 	struct drm_device          *drm_dev;
 	struct device              *dev;
@@ -70,7 +70,7 @@ static int exynos_dp_poweroff(struct analogix_dp_plat_data *plat_data)
 static int exynos_dp_get_modes(struct analogix_dp_plat_data *plat_data)
 {
 	struct exynos_dp_device *dp = to_dp(plat_data);
-	struct drm_connector *connector = &dp->connector;
+	struct drm_connector *connector = dp->connector;
 	struct drm_display_mode *mode;
 	int num_modes = 0;
 
@@ -103,6 +103,7 @@ static int exynos_dp_bridge_attach(struct analogix_dp_plat_data *plat_data,
 	int ret;
 
 	drm_connector_register(connector);
+	dp->connector = connector;
 
 	/* Pre-empt DP connector creation if there's a bridge */
 	if (dp->ptn_bridge) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_core.c b/drivers/gpu/drm/exynos/exynos_drm_core.c
index 011211e4167d..edbd98ff293e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_core.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_core.c
@@ -15,7 +15,6 @@
 #include <drm/drmP.h>
 #include "exynos_drm_drv.h"
 #include "exynos_drm_crtc.h"
-#include "exynos_drm_fbdev.h"
 
 static LIST_HEAD(exynos_drm_subdrv_list);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 3efe1aa89416..d47216488985 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -30,7 +30,6 @@
 
 #include "exynos_drm_drv.h"
 #include "exynos_drm_fb.h"
-#include "exynos_drm_fbdev.h"
 #include "exynos_drm_crtc.h"
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
@@ -120,7 +119,6 @@ static struct fimd_driver_data s3c64xx_fimd_driver_data = {
 	.timing_base = 0x0,
 	.has_clksel = 1,
 	.has_limited_fmt = 1,
-	.has_hw_trigger = 1,
 };
 
 static struct fimd_driver_data exynos3_fimd_driver_data = {
@@ -171,14 +169,11 @@ static struct fimd_driver_data exynos5420_fimd_driver_data = {
 	.lcdblk_vt_shift = 24,
 	.lcdblk_bypass_shift = 15,
 	.lcdblk_mic_bypass_shift = 11,
-	.trg_type = I80_HW_TRG,
 	.has_shadowcon = 1,
 	.has_vidoutcon = 1,
 	.has_vtsel = 1,
 	.has_mic_bypass = 1,
 	.has_dp_clk = 1,
-	.has_hw_trigger = 1,
-	.has_trigger_per_te = 1,
 };
 
 struct fimd_context {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 493552368295..8564c3da0d22 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -48,13 +48,13 @@
 
 /* registers for base address */
 #define G2D_SRC_BASE_ADDR		0x0304
-#define G2D_SRC_STRIDE_REG		0x0308
+#define G2D_SRC_STRIDE			0x0308
 #define G2D_SRC_COLOR_MODE		0x030C
 #define G2D_SRC_LEFT_TOP		0x0310
 #define G2D_SRC_RIGHT_BOTTOM		0x0314
 #define G2D_SRC_PLANE2_BASE_ADDR	0x0318
 #define G2D_DST_BASE_ADDR		0x0404
-#define G2D_DST_STRIDE_REG		0x0408
+#define G2D_DST_STRIDE			0x0408
 #define G2D_DST_COLOR_MODE		0x040C
 #define G2D_DST_LEFT_TOP		0x0410
 #define G2D_DST_RIGHT_BOTTOM		0x0414
@@ -563,7 +563,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 
 	switch (reg_offset) {
 	case G2D_SRC_BASE_ADDR:
-	case G2D_SRC_STRIDE_REG:
+	case G2D_SRC_STRIDE:
 	case G2D_SRC_COLOR_MODE:
 	case G2D_SRC_LEFT_TOP:
 	case G2D_SRC_RIGHT_BOTTOM:
@@ -573,7 +573,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 		reg_type = REG_TYPE_SRC_PLANE2;
 		break;
 	case G2D_DST_BASE_ADDR:
-	case G2D_DST_STRIDE_REG:
+	case G2D_DST_STRIDE:
 	case G2D_DST_COLOR_MODE:
 	case G2D_DST_LEFT_TOP:
 	case G2D_DST_RIGHT_BOTTOM:
@@ -968,8 +968,8 @@ static int g2d_check_reg_offset(struct device *dev,
 			} else
 				buf_info->types[reg_type] = BUF_TYPE_GEM;
 			break;
-		case G2D_SRC_STRIDE_REG:
-		case G2D_DST_STRIDE_REG:
+		case G2D_SRC_STRIDE:
+		case G2D_DST_STRIDE:
 			if (for_addr)
 				goto err;
 
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
index 706de3278f1c..3371635cd4d7 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_crtc.c
@@ -44,6 +44,8 @@ static void fsl_dcu_drm_disable_crtc(struct drm_crtc *crtc)
 	struct drm_device *dev = crtc->dev;
 	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
 
+	drm_crtc_vblank_off(crtc);
+
 	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
 			   DCU_MODE_DCU_MODE_MASK,
 			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
@@ -61,6 +63,8 @@ static void fsl_dcu_drm_crtc_enable(struct drm_crtc *crtc)
 			   DCU_MODE_DCU_MODE(DCU_MODE_NORMAL));
 	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
 		     DCU_UPDATE_MODE_READREG);
+
+	drm_crtc_vblank_on(crtc);
 }
 
 static void fsl_dcu_drm_crtc_mode_set_nofb(struct drm_crtc *crtc)
@@ -137,9 +141,10 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 {
 	struct drm_plane *primary;
 	struct drm_crtc *crtc = &fsl_dev->crtc;
-	unsigned int i, j, reg_num;
 	int ret;
 
+	fsl_dcu_drm_init_planes(fsl_dev->drm);
+
 	primary = fsl_dcu_drm_primary_create_plane(fsl_dev->drm);
 	if (!primary)
 		return -ENOMEM;
@@ -153,19 +158,5 @@ int fsl_dcu_drm_crtc_create(struct fsl_dcu_drm_device *fsl_dev)
 
 	drm_crtc_helper_add(crtc, &fsl_dcu_drm_crtc_helper_funcs);
 
-	if (!strcmp(fsl_dev->soc->name, "ls1021a"))
-		reg_num = LS1021A_LAYER_REG_NUM;
-	else
-		reg_num = VF610_LAYER_REG_NUM;
-	for (i = 0; i < fsl_dev->soc->total_layer; i++) {
-		for (j = 1; j <= reg_num; j++)
-			regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0);
-	}
-	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
-			   DCU_MODE_DCU_MODE_MASK,
-			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
-	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
-		     DCU_UPDATE_MODE_READREG);
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index 33727d5d826a..7882387f9bff 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -11,6 +11,7 @@
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
+#include <linux/console.h>
 #include <linux/io.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mm.h>
@@ -22,6 +23,7 @@
 #include <linux/regmap.h>
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
@@ -42,7 +44,6 @@ static const struct regmap_config fsl_dcu_regmap_config = {
 	.reg_bits = 32,
 	.reg_stride = 4,
 	.val_bits = 32,
-	.cache_type = REGCACHE_RBTREE,
 
 	.volatile_reg = fsl_dcu_drm_is_volatile_reg,
 };
@@ -228,11 +229,26 @@ static int fsl_dcu_drm_pm_suspend(struct device *dev)
 	if (!fsl_dev)
 		return 0;
 
+	disable_irq(fsl_dev->irq);
 	drm_kms_helper_poll_disable(fsl_dev->drm);
-	regcache_cache_only(fsl_dev->regmap, true);
-	regcache_mark_dirty(fsl_dev->regmap);
-	clk_disable(fsl_dev->clk);
-	clk_unprepare(fsl_dev->clk);
+
+	console_lock();
+	drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 1);
+	console_unlock();
+
+	fsl_dev->state = drm_atomic_helper_suspend(fsl_dev->drm);
+	if (IS_ERR(fsl_dev->state)) {
+		console_lock();
+		drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 0);
+		console_unlock();
+
+		drm_kms_helper_poll_enable(fsl_dev->drm);
+		enable_irq(fsl_dev->irq);
+		return PTR_ERR(fsl_dev->state);
+	}
+
+	clk_disable_unprepare(fsl_dev->pix_clk);
+	clk_disable_unprepare(fsl_dev->clk);
 
 	return 0;
 }
@@ -245,21 +261,27 @@ static int fsl_dcu_drm_pm_resume(struct device *dev)
 	if (!fsl_dev)
 		return 0;
 
-	ret = clk_enable(fsl_dev->clk);
+	ret = clk_prepare_enable(fsl_dev->clk);
 	if (ret < 0) {
 		dev_err(dev, "failed to enable dcu clk\n");
-		clk_unprepare(fsl_dev->clk);
 		return ret;
 	}
-	ret = clk_prepare(fsl_dev->clk);
+
+	ret = clk_prepare_enable(fsl_dev->pix_clk);
 	if (ret < 0) {
-		dev_err(dev, "failed to prepare dcu clk\n");
+		dev_err(dev, "failed to enable pix clk\n");
 		return ret;
 	}
 
+	fsl_dcu_drm_init_planes(fsl_dev->drm);
+	drm_atomic_helper_resume(fsl_dev->drm, fsl_dev->state);
+
+	console_lock();
+	drm_fbdev_cma_set_suspend(fsl_dev->fbdev, 0);
+	console_unlock();
+
 	drm_kms_helper_poll_enable(fsl_dev->drm);
-	regcache_cache_only(fsl_dev->regmap, false);
-	regcache_sync(fsl_dev->regmap);
+	enable_irq(fsl_dev->irq);
 
 	return 0;
 }
@@ -273,12 +295,14 @@ static const struct fsl_dcu_soc_data fsl_dcu_ls1021a_data = {
 	.name = "ls1021a",
 	.total_layer = 16,
 	.max_layer = 4,
+	.layer_regs = LS1021A_LAYER_REG_NUM,
 };
 
 static const struct fsl_dcu_soc_data fsl_dcu_vf610_data = {
 	.name = "vf610",
 	.total_layer = 64,
 	.max_layer = 6,
+	.layer_regs = VF610_LAYER_REG_NUM,
 };
 
 static const struct of_device_id fsl_dcu_of_match[] = {
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
index c275f900ff23..3b371fe7491e 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.h
@@ -175,6 +175,7 @@ struct fsl_dcu_soc_data {
 	unsigned int total_layer;
 	/*max layer number DCU supported*/
 	unsigned int max_layer;
+	unsigned int layer_regs;
 };
 
 struct fsl_dcu_drm_device {
@@ -193,6 +194,7 @@ struct fsl_dcu_drm_device {
 	struct drm_encoder encoder;
 	struct fsl_dcu_drm_connector connector;
 	const struct fsl_dcu_soc_data *soc;
+	struct drm_atomic_state *state;
 };
 
 void fsl_dcu_fbdev_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
index 274558b3b32b..e50467a0deb0 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.c
@@ -217,6 +217,22 @@ static const u32 fsl_dcu_drm_plane_formats[] = {
 	DRM_FORMAT_YUV422,
 };
 
+void fsl_dcu_drm_init_planes(struct drm_device *dev)
+{
+	struct fsl_dcu_drm_device *fsl_dev = dev->dev_private;
+	int i, j;
+
+	for (i = 0; i < fsl_dev->soc->total_layer; i++) {
+		for (j = 1; j <= fsl_dev->soc->layer_regs; j++)
+			regmap_write(fsl_dev->regmap, DCU_CTRLDESCLN(i, j), 0);
+	}
+	regmap_update_bits(fsl_dev->regmap, DCU_DCU_MODE,
+			   DCU_MODE_DCU_MODE_MASK,
+			   DCU_MODE_DCU_MODE(DCU_MODE_OFF));
+	regmap_write(fsl_dev->regmap, DCU_UPDATE_MODE,
+		     DCU_UPDATE_MODE_READREG);
+}
+
 struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev)
 {
 	struct drm_plane *primary;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
index d657f088d859..8ee45f813ee8 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_plane.h
@@ -12,6 +12,7 @@
 #ifndef __FSL_DCU_DRM_PLANE_H__
 #define __FSL_DCU_DRM_PLANE_H__
 
+void fsl_dcu_drm_init_planes(struct drm_device *dev);
 struct drm_plane *fsl_dcu_drm_primary_create_plane(struct drm_device *dev);
 
 #endif /* __FSL_DCU_DRM_PLANE_H__ */
diff --git a/drivers/gpu/drm/hisilicon/kirin/Kconfig b/drivers/gpu/drm/hisilicon/kirin/Kconfig
index ea0df6115f7e..499f64405dac 100644
--- a/drivers/gpu/drm/hisilicon/kirin/Kconfig
+++ b/drivers/gpu/drm/hisilicon/kirin/Kconfig
@@ -4,6 +4,7 @@ config DRM_HISI_KIRIN
 	select DRM_KMS_HELPER
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_CMA_HELPER
+	select HISI_KIRIN_DW_DSI
 	help
 	  Choose this option if you have a hisilicon Kirin chipsets(hi6220).
 	  If M is selected the module will be called kirin-drm.
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
index 805f4326fafe..c3707d47cd89 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_ade.c
@@ -967,21 +967,21 @@ static int ade_dts_parse(struct platform_device *pdev, struct ade_hw_ctx *ctx)
 	}
 
 	ctx->ade_core_clk = devm_clk_get(dev, "clk_ade_core");
-	if (!ctx->ade_core_clk) {
+	if (IS_ERR(ctx->ade_core_clk)) {
 		DRM_ERROR("failed to parse clk ADE_CORE\n");
-		return -ENODEV;
+		return PTR_ERR(ctx->ade_core_clk);
 	}
 
 	ctx->media_noc_clk = devm_clk_get(dev, "clk_codec_jpeg");
-	if (!ctx->media_noc_clk) {
+	if (IS_ERR(ctx->media_noc_clk)) {
 		DRM_ERROR("failed to parse clk CODEC_JPEG\n");
-	    return -ENODEV;
+		return PTR_ERR(ctx->media_noc_clk);
 	}
 
 	ctx->ade_pix_clk = devm_clk_get(dev, "clk_ade_pix");
-	if (!ctx->ade_pix_clk) {
+	if (IS_ERR(ctx->ade_pix_clk)) {
 		DRM_ERROR("failed to parse clk ADE_PIX\n");
-	    return -ENODEV;
+		return PTR_ERR(ctx->ade_pix_clk);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 0b84f8e5df50..ffa43eca14d3 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4622,9 +4622,6 @@ intel_dp_hpd_pulse(struct intel_digital_port *intel_dig_port, bool long_hpd)
 	intel_display_power_get(dev_priv, power_domain);
 
 	if (long_hpd) {
-		/* indicate that we need to restart link training */
-		intel_dp->train_set_valid = false;
-
 		intel_dp_long_pulse(intel_dp->attached_connector);
 		if (intel_dp->is_mst)
 			ret = IRQ_HANDLED;
diff --git a/drivers/gpu/drm/i915/intel_dp_link_training.c b/drivers/gpu/drm/i915/intel_dp_link_training.c
index 0b8eefc2acc5..60fb39cd220b 100644
--- a/drivers/gpu/drm/i915/intel_dp_link_training.c
+++ b/drivers/gpu/drm/i915/intel_dp_link_training.c
@@ -85,8 +85,7 @@ static bool
 intel_dp_reset_link_train(struct intel_dp *intel_dp,
 			uint8_t dp_train_pat)
 {
-	if (!intel_dp->train_set_valid)
-		memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
+	memset(intel_dp->train_set, 0, sizeof(intel_dp->train_set));
 	intel_dp_set_signal_levels(intel_dp);
 	return intel_dp_set_link_train(intel_dp, dp_train_pat);
 }
@@ -161,23 +160,6 @@ intel_dp_link_training_clock_recovery(struct intel_dp *intel_dp)
 			break;
 		}
 
-		/*
-		 * if we used previously trained voltage and pre-emphasis values
-		 * and we don't get clock recovery, reset link training values
-		 */
-		if (intel_dp->train_set_valid) {
-			DRM_DEBUG_KMS("clock recovery not ok, reset");
-			/* clear the flag as we are not reusing train set */
-			intel_dp->train_set_valid = false;
-			if (!intel_dp_reset_link_train(intel_dp,
-						       DP_TRAINING_PATTERN_1 |
-						       DP_LINK_SCRAMBLING_DISABLE)) {
-				DRM_ERROR("failed to enable link training\n");
-				return;
-			}
-			continue;
-		}
-
 		/* Check to see if we've tried the max voltage */
 		for (i = 0; i < intel_dp->lane_count; i++)
 			if ((intel_dp->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
@@ -284,7 +266,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 		/* Make sure clock is still ok */
 		if (!drm_dp_clock_recovery_ok(link_status,
 					      intel_dp->lane_count)) {
-			intel_dp->train_set_valid = false;
 			intel_dp_link_training_clock_recovery(intel_dp);
 			intel_dp_set_link_train(intel_dp,
 						training_pattern |
@@ -301,7 +282,6 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 
 		/* Try 5 times, then try clock recovery if that fails */
 		if (tries > 5) {
-			intel_dp->train_set_valid = false;
 			intel_dp_link_training_clock_recovery(intel_dp);
 			intel_dp_set_link_train(intel_dp,
 						training_pattern |
@@ -322,10 +302,8 @@ intel_dp_link_training_channel_equalization(struct intel_dp *intel_dp)
 
 	intel_dp_set_idle_link_train(intel_dp);
 
-	if (channel_eq) {
-		intel_dp->train_set_valid = true;
+	if (channel_eq)
 		DRM_DEBUG_KMS("Channel EQ done. DP Training successful\n");
-	}
 }
 
 void intel_dp_stop_link_train(struct intel_dp *intel_dp)
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 7d0e071fe355..089a42577ea3 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -886,8 +886,6 @@ struct intel_dp {
 	/* This is called before a link training is starterd */
 	void (*prepare_link_retrain)(struct intel_dp *intel_dp);
 
-	bool train_set_valid;
-
 	/* Displayport compliance testing */
 	unsigned long compliance_test_type;
 	unsigned long compliance_test_data;
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index ecabd59ffbaf..a19944b6dc25 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -818,8 +818,7 @@ static bool intel_fbc_can_choose(struct intel_crtc *crtc)
 {
 	struct drm_i915_private *dev_priv = crtc->base.dev->dev_private;
 	struct intel_fbc *fbc = &dev_priv->fbc;
-	bool enable_by_default = IS_HASWELL(dev_priv) ||
-				 IS_BROADWELL(dev_priv);
+	bool enable_by_default = IS_BROADWELL(dev_priv);
 
 	if (intel_vgpu_active(dev_priv)) {
 		fbc->no_fbc_reason = "VGPU is active";
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index ba812ef2c9d1..334562d06731 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1535,7 +1535,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
  * HDMI audio codec callbacks
  */
 
-static int mtk_hdmi_audio_hw_params(struct device *dev,
+static int mtk_hdmi_audio_hw_params(struct device *dev, void *data,
 				    struct hdmi_codec_daifmt *daifmt,
 				    struct hdmi_codec_params *params)
 {
@@ -1604,7 +1604,7 @@ static int mtk_hdmi_audio_hw_params(struct device *dev,
 	return 0;
 }
 
-static int mtk_hdmi_audio_startup(struct device *dev)
+static int mtk_hdmi_audio_startup(struct device *dev, void *data)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1615,7 +1615,7 @@ static int mtk_hdmi_audio_startup(struct device *dev)
 	return 0;
 }
 
-static void mtk_hdmi_audio_shutdown(struct device *dev)
+static void mtk_hdmi_audio_shutdown(struct device *dev, void *data)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1624,7 +1624,7 @@ static void mtk_hdmi_audio_shutdown(struct device *dev)
 	mtk_hdmi_audio_disable(hdmi);
 }
 
-int mtk_hdmi_audio_digital_mute(struct device *dev, bool enable)
+int mtk_hdmi_audio_digital_mute(struct device *dev, void *data, bool enable)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
@@ -1638,7 +1638,7 @@ int mtk_hdmi_audio_digital_mute(struct device *dev, bool enable)
 	return 0;
 }
 
-static int mtk_hdmi_audio_get_eld(struct device *dev, uint8_t *buf, size_t len)
+static int mtk_hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, size_t len)
 {
 	struct mtk_hdmi *hdmi = dev_get_drvdata(dev);
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 9d5083d0f1ee..68268e55d595 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -186,17 +186,6 @@ static void mgag200_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_r
 {
 }
 
-static int mgag200_bo_move(struct ttm_buffer_object *bo,
-		       bool evict, bool interruptible,
-		       bool no_wait_gpu,
-		       struct ttm_mem_reg *new_mem)
-{
-	int r;
-	r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
-	return r;
-}
-
-
 static void mgag200_ttm_backend_destroy(struct ttm_tt *tt)
 {
 	ttm_tt_fini(tt);
@@ -241,7 +230,7 @@ struct ttm_bo_driver mgag200_bo_driver = {
 	.ttm_tt_unpopulate = mgag200_ttm_tt_unpopulate,
 	.init_mem_type = mgag200_bo_init_mem_type,
 	.evict_flags = mgag200_bo_evict_flags,
-	.move = mgag200_bo_move,
+	.move = NULL,
 	.verify_access = mgag200_bo_verify_access,
 	.io_mem_reserve = &mgag200_ttm_io_mem_reserve,
 	.io_mem_free = &mgag200_ttm_io_mem_free,
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index fbe304ee6c80..2aec27dbb5bb 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -408,7 +408,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	}
 
 	adreno_gpu->memptrs = msm_gem_vaddr(adreno_gpu->memptrs_bo);
-	if (!adreno_gpu->memptrs) {
+	if (IS_ERR(adreno_gpu->memptrs)) {
 		dev_err(drm->dev, "could not vmap memptrs\n");
 		return -ENOMEM;
 	}
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 1a061e3e8b9e..a9223bea871b 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -159,6 +159,10 @@ static int msm_fbdev_create(struct drm_fb_helper *helper,
 	dev->mode_config.fb_base = paddr;
 
 	fbi->screen_base = msm_gem_vaddr_locked(fbdev->bo);
+	if (IS_ERR(fbi->screen_base)) {
+		ret = PTR_ERR(fbi->screen_base);
+		goto fail_unlock;
+	}
 	fbi->screen_size = fbdev->bo->size;
 	fbi->fix.smem_start = paddr;
 	fbi->fix.smem_len = fbdev->bo->size;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 7daf4054dd2b..69836f5685b1 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -398,6 +398,8 @@ void *msm_gem_vaddr_locked(struct drm_gem_object *obj)
 			return ERR_CAST(pages);
 		msm_obj->vaddr = vmap(pages, obj->size >> PAGE_SHIFT,
 				VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+		if (msm_obj->vaddr == NULL)
+			return ERR_PTR(-ENOMEM);
 	}
 	return msm_obj->vaddr;
 }
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index b89ca5174863..eb4bb8b2f3a5 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -40,12 +40,14 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
 
 	submit->dev = dev;
 	submit->gpu = gpu;
+	submit->fence = NULL;
 	submit->pid = get_pid(task_pid(current));
 
 	/* initially, until copy_from_user() and bo lookup succeeds: */
 	submit->nr_bos = 0;
 	submit->nr_cmds = 0;
 
+	INIT_LIST_HEAD(&submit->node);
 	INIT_LIST_HEAD(&submit->bo_list);
 	ww_acquire_init(&submit->ticket, &reservation_ww_class);
 
@@ -75,6 +77,11 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 		void __user *userptr =
 			u64_to_user_ptr(args->bos + (i * sizeof(submit_bo)));
 
+		/* make sure we don't have garbage flags, in case we hit
+		 * error path before flags is initialized:
+		 */
+		submit->bos[i].flags = 0;
+
 		ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
 		if (ret) {
 			ret = -EFAULT;
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index b48f73ac6389..0857710c2ff2 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -312,6 +312,9 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
 		struct msm_gem_object *obj = submit->bos[idx].obj;
 		const char *buf = msm_gem_vaddr_locked(&obj->base);
 
+		if (IS_ERR(buf))
+			continue;
+
 		buf += iova - submit->bos[idx].iova;
 
 		rd_write_section(rd, RD_GPUADDR,
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index 1f14b908b221..42f5359cf988 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -40,6 +40,10 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size)
 	}
 
 	ring->start = msm_gem_vaddr_locked(ring->bo);
+	if (IS_ERR(ring->start)) {
+		ret = PTR_ERR(ring->start);
+		goto fail;
+	}
 	ring->end   = ring->start + (size / 4);
 	ring->cur   = ring->start;
 
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
index 331620a52afa..287a7d6fa480 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
@@ -29,6 +29,7 @@ struct nv_device_info_v0 {
 #define NV_DEVICE_INFO_V0_FERMI                                            0x07
 #define NV_DEVICE_INFO_V0_KEPLER                                           0x08
 #define NV_DEVICE_INFO_V0_MAXWELL                                          0x09
+#define NV_DEVICE_INFO_V0_PASCAL                                           0x0a
 	__u8  family;
 	__u8  pad06[2];
 	__u64 ram_size;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 982aad8fa645..e6e9537537cf 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -39,6 +39,7 @@
 #define KEPLER_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000a06f
 #define KEPLER_CHANNEL_GPFIFO_B                       /* cla06f.h */ 0x0000a16f
 #define MAXWELL_CHANNEL_GPFIFO_A                      /* cla06f.h */ 0x0000b06f
+#define PASCAL_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000c06f
 
 #define NV50_DISP                                     /* cl5070.h */ 0x00005070
 #define G82_DISP                                      /* cl5070.h */ 0x00008270
@@ -50,6 +51,8 @@
 #define GK110_DISP                                    /* cl5070.h */ 0x00009270
 #define GM107_DISP                                    /* cl5070.h */ 0x00009470
 #define GM200_DISP                                    /* cl5070.h */ 0x00009570
+#define GP100_DISP                                    /* cl5070.h */ 0x00009770
+#define GP104_DISP                                    /* cl5070.h */ 0x00009870
 
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
@@ -86,6 +89,8 @@
 #define GK110_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000927d
 #define GM107_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000947d
 #define GM200_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000957d
+#define GP100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000977d
+#define GP104_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
@@ -105,6 +110,8 @@
 #define MAXWELL_A                                     /* cl9097.h */ 0x0000b097
 #define MAXWELL_B                                     /* cl9097.h */ 0x0000b197
 
+#define PASCAL_A                                      /* cl9097.h */ 0x0000c097
+
 #define NV74_BSP                                                     0x000074b0
 
 #define GT212_MSVLD                                                  0x000085b1
@@ -128,6 +135,8 @@
 #define FERMI_DMA                                                    0x000090b5
 #define KEPLER_DMA_COPY_A                                            0x0000a0b5
 #define MAXWELL_DMA_COPY_A                                           0x0000b0b5
+#define PASCAL_DMA_COPY_A                                            0x0000c0b5
+#define PASCAL_DMA_COPY_B                                            0x0000c1b5
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
@@ -137,6 +146,7 @@
 #define KEPLER_COMPUTE_B                                             0x0000a1c0
 #define MAXWELL_COMPUTE_A                                            0x0000b0c0
 #define MAXWELL_COMPUTE_B                                            0x0000b1c0
+#define PASCAL_COMPUTE_A                                             0x0000c0c0
 
 #define NV74_CIPHER                                                  0x000074c1
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index c612dc1f1eb4..7ea8aa7ca408 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -16,9 +16,9 @@ enum nvkm_devidx {
 	NVKM_SUBDEV_MC,
 	NVKM_SUBDEV_BUS,
 	NVKM_SUBDEV_TIMER,
+	NVKM_SUBDEV_INSTMEM,
 	NVKM_SUBDEV_FB,
 	NVKM_SUBDEV_LTC,
-	NVKM_SUBDEV_INSTMEM,
 	NVKM_SUBDEV_MMU,
 	NVKM_SUBDEV_BAR,
 	NVKM_SUBDEV_PMU,
@@ -33,7 +33,10 @@ enum nvkm_devidx {
 	NVKM_ENGINE_CE0,
 	NVKM_ENGINE_CE1,
 	NVKM_ENGINE_CE2,
-	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE2,
+	NVKM_ENGINE_CE3,
+	NVKM_ENGINE_CE4,
+	NVKM_ENGINE_CE5,
+	NVKM_ENGINE_CE_LAST = NVKM_ENGINE_CE5,
 
 	NVKM_ENGINE_CIPHER,
 	NVKM_ENGINE_DISP,
@@ -50,7 +53,8 @@ enum nvkm_devidx {
 
 	NVKM_ENGINE_NVENC0,
 	NVKM_ENGINE_NVENC1,
-	NVKM_ENGINE_NVENC_LAST = NVKM_ENGINE_NVENC1,
+	NVKM_ENGINE_NVENC2,
+	NVKM_ENGINE_NVENC_LAST = NVKM_ENGINE_NVENC2,
 
 	NVKM_ENGINE_NVDEC,
 	NVKM_ENGINE_PM,
@@ -102,6 +106,7 @@ struct nvkm_device {
 		NV_C0    = 0xc0,
 		NV_E0    = 0xe0,
 		GM100    = 0x110,
+		GP100    = 0x130,
 	} card_type;
 	u32 chipset;
 	u8  chiprev;
@@ -136,7 +141,7 @@ struct nvkm_device {
 	struct nvkm_volt *volt;
 
 	struct nvkm_engine *bsp;
-	struct nvkm_engine *ce[3];
+	struct nvkm_engine *ce[6];
 	struct nvkm_engine *cipher;
 	struct nvkm_disp *disp;
 	struct nvkm_dma *dma;
@@ -149,7 +154,7 @@ struct nvkm_device {
 	struct nvkm_engine *mspdec;
 	struct nvkm_engine *msppp;
 	struct nvkm_engine *msvld;
-	struct nvkm_engine *nvenc[2];
+	struct nvkm_engine *nvenc[3];
 	struct nvkm_engine *nvdec;
 	struct nvkm_pm *pm;
 	struct nvkm_engine *sec;
@@ -170,7 +175,6 @@ struct nvkm_device_func {
 	void (*fini)(struct nvkm_device *, bool suspend);
 	resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar);
 	resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar);
-	bool cpu_coherent;
 };
 
 struct nvkm_device_quirk {
@@ -206,7 +210,7 @@ struct nvkm_device_chip {
 	int (*volt    )(struct nvkm_device *, int idx, struct nvkm_volt **);
 
 	int (*bsp     )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*ce[3]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*ce[6]   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*cipher  )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*disp    )(struct nvkm_device *, int idx, struct nvkm_disp **);
 	int (*dma     )(struct nvkm_device *, int idx, struct nvkm_dma **);
@@ -219,7 +223,7 @@ struct nvkm_device_chip {
 	int (*mspdec  )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*msppp   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*msvld   )(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*nvenc[2])(struct nvkm_device *, int idx, struct nvkm_engine **);
+	int (*nvenc[3])(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*nvdec   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*pm      )(struct nvkm_device *, int idx, struct nvkm_pm **);
 	int (*sec     )(struct nvkm_device *, int idx, struct nvkm_engine **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
index b5370cb56e3c..e5c9b6268dcc 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
@@ -28,6 +28,7 @@ struct nvkm_device_tegra {
 	} iommu;
 
 	int gpu_speedo;
+	int gpu_speedo_id;
 };
 
 struct nvkm_device_tegra_func {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index 594d719ba41e..d3d26a1e215d 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -7,4 +7,6 @@ int gf100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gk104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gm107_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gp104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index d4fdce27b297..e82049667ce4 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -32,4 +32,6 @@ int gk104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gk110_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm107_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gp104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index 15ddfcf5e8db..ed92fec5292c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -66,4 +66,5 @@ int gk20a_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm107_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm200_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int gp100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
index 6515f5810a26..89cf99307828 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
@@ -42,4 +42,5 @@ int gk20a_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm107_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm200_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 int gm20b_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
+int gp100_gr_new(struct nvkm_device *, int, struct nvkm_gr **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
index e39a1fea930b..a72f3290528a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios.h
@@ -7,6 +7,9 @@ struct nvkm_bios {
 	u32 size;
 	u8 *data;
 
+	u32 image0_size;
+	u32 imaged_addr;
+
 	u32 bmp_offset;
 	u32 bit_offset;
 
@@ -22,10 +25,9 @@ struct nvkm_bios {
 u8  nvbios_checksum(const u8 *data, int size);
 u16 nvbios_findstr(const u8 *data, int size, const char *str, int len);
 int nvbios_memcmp(struct nvkm_bios *, u32 addr, const char *, u32 len);
-
-#define nvbios_rd08(b,o) (b)->data[(o)]
-#define nvbios_rd16(b,o) get_unaligned_le16(&(b)->data[(o)])
-#define nvbios_rd32(b,o) get_unaligned_le32(&(b)->data[(o)])
+u8  nvbios_rd08(struct nvkm_bios *, u32 addr);
+u16 nvbios_rd16(struct nvkm_bios *, u32 addr);
+u32 nvbios_rd32(struct nvkm_bios *, u32 addr);
 
 int nvkm_bios_new(struct nvkm_device *, int, struct nvkm_bios **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
index db10c11f0595..c5a6ebd5a478 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/disp.h
@@ -25,7 +25,8 @@ u16 nvbios_outp_match(struct nvkm_bios *, u16 type, u16 mask,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_outp *);
 
 struct nvbios_ocfg {
-	u16 match;
+	u8  proto;
+	u8  flags;
 	u16 clkcmp[2];
 };
 
@@ -33,7 +34,7 @@ u16 nvbios_ocfg_entry(struct nvkm_bios *, u16 outp, u8 idx,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
 u16 nvbios_ocfg_parse(struct nvkm_bios *, u16 outp, u8 idx,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *);
-u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u16 type,
+u16 nvbios_ocfg_match(struct nvkm_bios *, u16 outp, u8 proto, u8 flags,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *);
 u16 nvbios_oclk_match(struct nvkm_bios *, u16 cmp, u32 khz);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 0a734fd06acf..3a410275fa71 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -56,6 +56,8 @@ struct nvkm_fb {
 		int regions;
 	} tile;
 
+	u8 page;
+
 	struct nvkm_memory *mmu_rd;
 	struct nvkm_memory *mmu_wr;
 };
@@ -91,6 +93,8 @@ int gk104_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gk20a_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gm107_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gm200_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gp104_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 
 #include <subdev/bios.h>
 #include <subdev/bios/ramcfg.h>
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
index c6b90b6543b3..cd755baf9cab 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
@@ -38,4 +38,5 @@ int gk104_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gk20a_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm107_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 int gm200_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
+int gp100_ltc_new(struct nvkm_device *, int, struct nvkm_ltc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index 2e80682b2da1..27d25b18d85c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -7,11 +7,14 @@ struct nvkm_mc {
 	struct nvkm_subdev subdev;
 };
 
-void nvkm_mc_intr(struct nvkm_mc *, bool *handled);
-void nvkm_mc_intr_unarm(struct nvkm_mc *);
-void nvkm_mc_intr_rearm(struct nvkm_mc *);
-void nvkm_mc_reset(struct nvkm_mc *, enum nvkm_devidx);
-void nvkm_mc_unk260(struct nvkm_mc *, u32 data);
+void nvkm_mc_enable(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_disable(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_reset(struct nvkm_device *, enum nvkm_devidx);
+void nvkm_mc_intr(struct nvkm_device *, bool *handled);
+void nvkm_mc_intr_unarm(struct nvkm_device *);
+void nvkm_mc_intr_rearm(struct nvkm_device *);
+void nvkm_mc_intr_mask(struct nvkm_device *, enum nvkm_devidx, bool enable);
+void nvkm_mc_unk260(struct nvkm_device *, u32 data);
 
 int nv04_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int nv11_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
@@ -24,4 +27,5 @@ int gt215_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gf100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gk104_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
+int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index ddb913889d7e..e6523e2cea9f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -47,6 +47,7 @@ int g94_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf106_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gk104_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int gp100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 
 /* pcie functions */
 int nvkm_pcie_set_link(struct nvkm_pci *, enum nvkm_pcie_speed, u8 width);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
index c6edd95a5b69..b04c38c07761 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
@@ -43,9 +43,8 @@ struct nvkm_secboot {
 	const struct nvkm_secboot_func *func;
 	struct nvkm_subdev subdev;
 
+	enum nvkm_devidx devidx;
 	u32 base;
-	u32 irq_mask;
-	u32 enable_mask;
 };
 #define nvkm_secboot(p) container_of((p), struct nvkm_secboot, subdev)
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
index 8fb575a92c48..71ebbfd4484f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
@@ -8,10 +8,11 @@ struct nvkm_top {
 	struct list_head device;
 };
 
-u32 nvkm_top_reset(struct nvkm_top *, enum nvkm_devidx);
-u32 nvkm_top_intr(struct nvkm_top *, u32 intr, u64 *subdevs);
-enum nvkm_devidx nvkm_top_fault(struct nvkm_top *, int fault);
-enum nvkm_devidx nvkm_top_engine(struct nvkm_top *, int, int *runl, int *engn);
+u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_devidx);
+u32 nvkm_top_intr(struct nvkm_device *, u32 intr, u64 *subdevs);
+u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_devidx);
+enum nvkm_devidx nvkm_top_fault(struct nvkm_device *, int fault);
+enum nvkm_devidx nvkm_top_engine(struct nvkm_device *, int, int *runl, int *engn);
 
 int gk104_top_new(struct nvkm_device *, int, struct nvkm_top **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
index feff55cff05b..b765f4ffcde6 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
@@ -12,6 +12,9 @@ struct nvkm_volt {
 		u32 uv;
 		u8 vid;
 	} vid[256];
+
+	u32 max_uv;
+	u32 min_uv;
 };
 
 int nvkm_volt_get(struct nvkm_volt *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index eb7de487a2b3..7bd4683216d0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -100,6 +100,7 @@ nouveau_abi16_swclass(struct nouveau_drm *drm)
 	case NV_DEVICE_INFO_V0_FERMI:
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
+	case NV_DEVICE_INFO_V0_PASCAL:
 		return NVIF_CLASS_SW_GF100;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 5e3f3e826476..528bdeffb339 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -209,8 +209,7 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 	nvbo->tile_flags = tile_flags;
 	nvbo->bo.bdev = &drm->ttm.bdev;
 
-	if (!nvxx_device(&drm->device)->func->cpu_coherent)
-		nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
+	nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
 
 	nvbo->page_shift = 12;
 	if (drm->client.vm) {
@@ -424,13 +423,7 @@ nouveau_bo_map(struct nouveau_bo *nvbo)
 	if (ret)
 		return ret;
 
-	/*
-	 * TTM buffers allocated using the DMA API already have a mapping, let's
-	 * use it instead.
-	 */
-	if (!nvbo->force_coherent)
-		ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages,
-				  &nvbo->kmap);
+	ret = ttm_bo_kmap(&nvbo->bo, 0, nvbo->bo.mem.num_pages, &nvbo->kmap);
 
 	ttm_bo_unreserve(&nvbo->bo);
 	return ret;
@@ -442,12 +435,7 @@ nouveau_bo_unmap(struct nouveau_bo *nvbo)
 	if (!nvbo)
 		return;
 
-	/*
-	 * TTM buffers allocated using the DMA API already had a coherent
-	 * mapping which we used, no need to unmap.
-	 */
-	if (!nvbo->force_coherent)
-		ttm_bo_kunmap(&nvbo->kmap);
+	ttm_bo_kunmap(&nvbo->kmap);
 }
 
 void
@@ -506,35 +494,13 @@ nouveau_bo_validate(struct nouveau_bo *nvbo, bool interruptible,
 	return 0;
 }
 
-static inline void *
-_nouveau_bo_mem_index(struct nouveau_bo *nvbo, unsigned index, void *mem, u8 sz)
-{
-	struct ttm_dma_tt *dma_tt;
-	u8 *m = mem;
-
-	index *= sz;
-
-	if (m) {
-		/* kmap'd address, return the corresponding offset */
-		m += index;
-	} else {
-		/* DMA-API mapping, lookup the right address */
-		dma_tt = (struct ttm_dma_tt *)nvbo->bo.ttm;
-		m = dma_tt->cpu_address[index / PAGE_SIZE];
-		m += index % PAGE_SIZE;
-	}
-
-	return m;
-}
-#define nouveau_bo_mem_index(o, i, m) _nouveau_bo_mem_index(o, i, m, sizeof(*m))
-
 void
 nouveau_bo_wr16(struct nouveau_bo *nvbo, unsigned index, u16 val)
 {
 	bool is_iomem;
 	u16 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		iowrite16_native(val, (void __force __iomem *)mem);
@@ -548,7 +514,7 @@ nouveau_bo_rd32(struct nouveau_bo *nvbo, unsigned index)
 	bool is_iomem;
 	u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		return ioread32_native((void __force __iomem *)mem);
@@ -562,7 +528,7 @@ nouveau_bo_wr32(struct nouveau_bo *nvbo, unsigned index, u32 val)
 	bool is_iomem;
 	u32 *mem = ttm_kmap_obj_virtual(&nvbo->kmap, &is_iomem);
 
-	mem = nouveau_bo_mem_index(nvbo, index, mem);
+	mem += index;
 
 	if (is_iomem)
 		iowrite32_native(val, (void __force __iomem *)mem);
@@ -1082,7 +1048,6 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 				ret = ttm_bo_move_accel_cleanup(bo,
 								&fence->base,
 								evict,
-								no_wait_gpu,
 								new_mem);
 				nouveau_fence_unref(&fence);
 			}
@@ -1104,6 +1069,10 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 			    struct ttm_mem_reg *, struct ttm_mem_reg *);
 		int (*init)(struct nouveau_channel *, u32 handle);
 	} _methods[] = {
+		{  "COPY", 4, 0xc1b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc1b5, nve0_bo_move_copy, nvc0_bo_move_init },
+		{  "COPY", 4, 0xc0b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc0b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xb0b5, nve0_bo_move_copy, nve0_bo_move_init },
 		{  "GRCE", 0, 0xb0b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init },
@@ -1289,6 +1258,10 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	struct nouveau_drm_tile *new_tile = NULL;
 	int ret = 0;
 
+	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	if (nvbo->pin_refcnt)
 		NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
 
@@ -1324,7 +1297,7 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	/* Fallback to software copy. */
 	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
 	if (ret == 0)
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		ret = ttm_bo_move_memcpy(bo, evict, intr, no_wait_gpu, new_mem);
 
 out:
 	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
@@ -1488,14 +1461,6 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 	dev = drm->dev;
 	pdev = device->dev;
 
-	/*
-	 * Objects matching this condition have been marked as force_coherent,
-	 * so use the DMA API for them.
-	 */
-	if (!nvxx_device(&drm->device)->func->cpu_coherent &&
-	    ttm->caching_state == tt_uncached)
-		return ttm_dma_populate(ttm_dma, dev->dev);
-
 #if IS_ENABLED(CONFIG_AGP)
 	if (drm->agp.bridge) {
 		return ttm_agp_tt_populate(ttm);
@@ -1553,16 +1518,6 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 	dev = drm->dev;
 	pdev = device->dev;
 
-	/*
-	 * Objects matching this condition have been marked as force_coherent,
-	 * so use the DMA API for them.
-	 */
-	if (!nvxx_device(&drm->device)->func->cpu_coherent &&
-	    ttm->caching_state == tt_uncached) {
-		ttm_dma_unpopulate(ttm_dma, dev->dev);
-		return;
-	}
-
 #if IS_ENABLED(CONFIG_AGP)
 	if (drm->agp.bridge) {
 		ttm_agp_tt_unpopulate(ttm);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index b1d2527c5625..f9b3c811187e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -191,7 +191,8 @@ static int
 nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 		    u32 engine, struct nouveau_channel **pchan)
 {
-	static const u16 oclasses[] = { MAXWELL_CHANNEL_GPFIFO_A,
+	static const u16 oclasses[] = { PASCAL_CHANNEL_GPFIFO_A,
+					MAXWELL_CHANNEL_GPFIFO_A,
 					KEPLER_CHANNEL_GPFIFO_B,
 					KEPLER_CHANNEL_GPFIFO_A,
 					FERMI_CHANNEL_GPFIFO,
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 6e97862d31a1..afbf557b23d4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -495,6 +495,8 @@ nouveau_display_create(struct drm_device *dev)
 
 	if (nouveau_modeset != 2 && drm->vbios.dcb.entries) {
 		static const u16 oclass[] = {
+			GP104_DISP,
+			GP100_DISP,
 			GM200_DISP,
 			GM107_DISP,
 			GK110_DISP,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 1161f8b452f2..66c1280c0f1f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -198,6 +198,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		case KEPLER_CHANNEL_GPFIFO_A:
 		case KEPLER_CHANNEL_GPFIFO_B:
 		case MAXWELL_CHANNEL_GPFIFO_A:
+		case PASCAL_CHANNEL_GPFIFO_A:
 			ret = nvc0_fence_create(drm);
 			break;
 		default:
@@ -316,7 +317,16 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 	if (vga_switcheroo_client_probe_defer(pdev))
 		return -EPROBE_DEFER;
 
-	/* remove conflicting drivers (vesafb, efifb etc) */
+	/* We need to check that the chipset is supported before booting
+	 * fbdev off the hardware, as there's no way to put it back.
+	 */
+	ret = nvkm_device_pci_new(pdev, NULL, "error", true, false, 0, &device);
+	if (ret)
+		return ret;
+
+	nvkm_device_del(&device);
+
+	/* Remove conflicting drivers (vesafb, efifb etc). */
 	aper = alloc_apertures(3);
 	if (!aper)
 		return -ENOMEM;
@@ -430,6 +440,11 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	nouveau_vga_init(drm);
 
 	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (!nvxx_device(&drm->device)->mmu) {
+			ret = -ENOSYS;
+			goto fail_device;
+		}
+
 		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
 				  0x1000, NULL, &drm->client.vm);
 		if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 57aaf98a26f9..d1f248fd3506 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -552,6 +552,8 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
+	if (fbcon->helper.fbdev)
+		fbcon->helper.fbdev->pixmap.buf_align = 4;
 	return 0;
 
 fini:
diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
index 1ff4166af26e..71f764bf4cc6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
@@ -535,6 +535,40 @@ static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO,
 			  nouveau_hwmon_get_in0_input, NULL, 0);
 
 static ssize_t
+nouveau_hwmon_get_in0_min(struct device *d,
+			    struct device_attribute *a, char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+
+	if (!volt || !volt->min_uv)
+		return -ENODEV;
+
+	return sprintf(buf, "%i\n", volt->min_uv / 1000);
+}
+
+static SENSOR_DEVICE_ATTR(in0_min, S_IRUGO,
+			  nouveau_hwmon_get_in0_min, NULL, 0);
+
+static ssize_t
+nouveau_hwmon_get_in0_max(struct device *d,
+			    struct device_attribute *a, char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+
+	if (!volt || !volt->max_uv)
+		return -ENODEV;
+
+	return sprintf(buf, "%i\n", volt->max_uv / 1000);
+}
+
+static SENSOR_DEVICE_ATTR(in0_max, S_IRUGO,
+			  nouveau_hwmon_get_in0_max, NULL, 0);
+
+static ssize_t
 nouveau_hwmon_get_in0_label(struct device *d,
 			    struct device_attribute *a, char *buf)
 {
@@ -594,6 +628,8 @@ static struct attribute *hwmon_pwm_fan_attributes[] = {
 
 static struct attribute *hwmon_in0_attributes[] = {
 	&sensor_dev_attr_in0_input.dev_attr.attr,
+	&sensor_dev_attr_in0_min.dev_attr.attr,
+	&sensor_dev_attr_in0_max.dev_attr.attr,
 	&sensor_dev_attr_in0_label.dev_attr.attr,
 	NULL
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index bcee91497eb9..1825dbc33192 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -164,6 +164,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 	case NV_DEVICE_INFO_V0_FERMI:
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
+	case NV_DEVICE_INFO_V0_PASCAL:
 		node->memtype = (nvbo->tile_flags & 0xff00) >> 8;
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 0f3e4bb411cc..7d9248b8c664 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -82,7 +82,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	uint32_t fg;
 	uint32_t bg;
 	uint32_t dsize;
-	uint32_t width;
 	uint32_t *data = (uint32_t *)image->data;
 	int ret;
 
@@ -93,9 +92,6 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 8);
-	dsize = ALIGN(width * image->height, 32) >> 5;
-
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
 		fg = ((uint32_t *) info->pseudo_palette)[image->fg_color];
@@ -111,10 +107,11 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 			 ((image->dx + image->width) & 0xffff));
 	OUT_RING(chan, bg);
 	OUT_RING(chan, fg);
-	OUT_RING(chan, (image->height << 16) | width);
+	OUT_RING(chan, (image->height << 16) | image->width);
 	OUT_RING(chan, (image->height << 16) | image->width);
 	OUT_RING(chan, (image->dy << 16) | (image->dx & 0xffff));
 
+	dsize = ALIGN(image->width * image->height, 32) >> 5;
 	while (dsize) {
 		int iter_len = dsize > 128 ? 128 : dsize;
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 7a7788212df7..7d0edcbcfca7 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -297,6 +297,8 @@ nv50_core_create(struct nvif_device *device, struct nvif_object *disp,
 		.pushbuf = 0xb0007d00,
 	};
 	static const s32 oclass[] = {
+		GP104_DISP_CORE_CHANNEL_DMA,
+		GP100_DISP_CORE_CHANNEL_DMA,
 		GM200_DISP_CORE_CHANNEL_DMA,
 		GM107_DISP_CORE_CHANNEL_DMA,
 		GK110_DISP_CORE_CHANNEL_DMA,
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index 33d9ee0fac40..1aeb698e9707 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -95,7 +95,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	struct nouveau_fbdev *nfbdev = info->par;
 	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
 	struct nouveau_channel *chan = drm->channel;
-	uint32_t width, dwords, *data = (uint32_t *)image->data;
+	uint32_t dwords, *data = (uint32_t *)image->data;
 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
 	uint32_t *palette = info->pseudo_palette;
 	int ret;
@@ -107,9 +107,6 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 32);
-	dwords = (width * image->height) >> 5;
-
 	BEGIN_NV04(chan, NvSub2D, 0x0814, 2);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
@@ -128,6 +125,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	OUT_RING(chan, 0);
 	OUT_RING(chan, image->dy);
 
+	dwords = ALIGN(image->width * image->height, 32) >> 5;
 	while (dwords) {
 		int push = dwords > 2047 ? 2047 : dwords;
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
index a0913359ac05..839f4c8c1805 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
@@ -95,7 +95,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	struct nouveau_fbdev *nfbdev = info->par;
 	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
 	struct nouveau_channel *chan = drm->channel;
-	uint32_t width, dwords, *data = (uint32_t *)image->data;
+	uint32_t dwords, *data = (uint32_t *)image->data;
 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
 	uint32_t *palette = info->pseudo_palette;
 	int ret;
@@ -107,9 +107,6 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	if (ret)
 		return ret;
 
-	width = ALIGN(image->width, 32);
-	dwords = (width * image->height) >> 5;
-
 	BEGIN_NVC0(chan, NvSub2D, 0x0814, 2);
 	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
 	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
@@ -128,6 +125,7 @@ nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	OUT_RING  (chan, 0);
 	OUT_RING  (chan, image->dy);
 
+	dwords = ALIGN(image->width * image->height, 32) >> 5;
 	while (dwords) {
 		int push = dwords > 2047 ? 2047 : dwords;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index b18557858f19..19044aba265e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -57,6 +57,9 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_ENGINE_CE0     ] = "ce0",
 	[NVKM_ENGINE_CE1     ] = "ce1",
 	[NVKM_ENGINE_CE2     ] = "ce2",
+	[NVKM_ENGINE_CE3     ] = "ce3",
+	[NVKM_ENGINE_CE4     ] = "ce4",
+	[NVKM_ENGINE_CE5     ] = "ce5",
 	[NVKM_ENGINE_CIPHER  ] = "cipher",
 	[NVKM_ENGINE_DISP    ] = "disp",
 	[NVKM_ENGINE_DMAOBJ  ] = "dma",
@@ -71,6 +74,7 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_ENGINE_MSVLD   ] = "msvld",
 	[NVKM_ENGINE_NVENC0  ] = "nvenc0",
 	[NVKM_ENGINE_NVENC1  ] = "nvenc1",
+	[NVKM_ENGINE_NVENC2  ] = "nvenc2",
 	[NVKM_ENGINE_NVDEC   ] = "nvdec",
 	[NVKM_ENGINE_PM      ] = "pm",
 	[NVKM_ENGINE_SEC     ] = "sec",
@@ -105,7 +109,7 @@ nvkm_subdev_fini(struct nvkm_subdev *subdev, bool suspend)
 		}
 	}
 
-	nvkm_mc_reset(device->mc, subdev->index);
+	nvkm_mc_reset(device, subdev->index);
 
 	time = ktime_to_us(ktime_get()) - time;
 	nvkm_trace(subdev, "%s completed in %lldus\n", action, time);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index 9c19d59b47df..a4458a8eb30a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -3,3 +3,5 @@ nvkm-y += nvkm/engine/ce/gf100.o
 nvkm-y += nvkm/engine/ce/gk104.o
 nvkm-y += nvkm/engine/ce/gm107.o
 nvkm-y += nvkm/engine/ce/gm200.o
+nvkm-y += nvkm/engine/ce/gp100.o
+nvkm-y += nvkm/engine/ce/gp104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c
new file mode 100644
index 000000000000..c7710456bc30
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp100.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+#include <core/enum.h>
+
+#include <nvif/class.h>
+
+static const struct nvkm_enum
+gp100_ce_launcherr_report[] = {
+	{ 0x0, "NO_ERR" },
+	{ 0x1, "2D_LAYER_EXCEEDS_DEPTH" },
+	{ 0x2, "INVALID_ALIGNMENT" },
+	{ 0x3, "MEM2MEM_RECT_OUT_OF_BOUNDS" },
+	{ 0x4, "SRC_LINE_EXCEEDS_PITCH" },
+	{ 0x5, "SRC_LINE_EXCEEDS_NEG_PITCH" },
+	{ 0x6, "DST_LINE_EXCEEDS_PITCH" },
+	{ 0x7, "DST_LINE_EXCEEDS_NEG_PITCH" },
+	{ 0x8, "BAD_SRC_PIXEL_COMP_REF" },
+	{ 0x9, "INVALID_VALUE" },
+	{ 0xa, "UNUSED_FIELD" },
+	{ 0xb, "INVALID_OPERATION" },
+	{ 0xc, "NO_RESOURCES" },
+	{ 0xd, "INVALID_CONFIG" },
+	{}
+};
+
+static void
+gp100_ce_intr_launcherr(struct nvkm_engine *ce, const u32 base)
+{
+	struct nvkm_subdev *subdev = &ce->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x104418 + base);
+	const struct nvkm_enum *en =
+		nvkm_enum_find(gp100_ce_launcherr_report, stat & 0x0000000f);
+	nvkm_warn(subdev, "LAUNCHERR %08x [%s]\n", stat, en ? en->name : "");
+}
+
+void
+gp100_ce_intr(struct nvkm_engine *ce)
+{
+	const u32 base = (ce->subdev.index - NVKM_ENGINE_CE0) * 0x80;
+	struct nvkm_subdev *subdev = &ce->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = nvkm_rd32(device, 0x10440c + base);
+	u32 intr = nvkm_rd32(device, 0x104410 + base) & mask;
+	if (intr & 0x00000001) { //XXX: guess
+		nvkm_warn(subdev, "BLOCKPIPE\n");
+		nvkm_wr32(device, 0x104410 + base, 0x00000001);
+		intr &= ~0x00000001;
+	}
+	if (intr & 0x00000002) { //XXX: guess
+		nvkm_warn(subdev, "NONBLOCKPIPE\n");
+		nvkm_wr32(device, 0x104410 + base, 0x00000002);
+		intr &= ~0x00000002;
+	}
+	if (intr & 0x00000004) {
+		gp100_ce_intr_launcherr(ce, base);
+		nvkm_wr32(device, 0x104410 + base, 0x00000004);
+		intr &= ~0x00000004;
+	}
+	if (intr) {
+		nvkm_warn(subdev, "intr %08x\n", intr);
+		nvkm_wr32(device, 0x104410 + base, intr);
+	}
+}
+
+static const struct nvkm_engine_func
+gp100_ce = {
+	.intr = gp100_ce_intr,
+	.sclass = {
+		{ -1, -1, PASCAL_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+gp100_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&gp100_ce, device, index, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c
new file mode 100644
index 000000000000..20e019788a53
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+#include <core/enum.h>
+
+#include <nvif/class.h>
+
+static const struct nvkm_engine_func
+gp104_ce = {
+	.intr = gp100_ce_intr,
+	.sclass = {
+		{ -1, -1, PASCAL_DMA_COPY_B },
+		{ -1, -1, PASCAL_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+gp104_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&gp104_ce, device, index, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
index e2fa8b161943..2dce405976ad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
@@ -4,4 +4,5 @@
 
 void gt215_ce_intr(struct nvkm_falcon *, struct nvkm_fifo_chan *);
 void gk104_ce_intr(struct nvkm_engine *);
+void gp100_ce_intr(struct nvkm_engine *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 4572debcb0c9..7218a067a6c5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2148,6 +2148,67 @@ nv12b_chipset = {
 	.sw = gf100_sw_new,
 };
 
+static const struct nvkm_device_chip
+nv130_chipset = {
+	.name = "GP100",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp100_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.secboot = gm200_secboot_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp100_ce_new,
+	.ce[1] = gp100_ce_new,
+	.ce[2] = gp100_ce_new,
+	.ce[3] = gp100_ce_new,
+	.ce[4] = gp100_ce_new,
+	.ce[5] = gp100_ce_new,
+	.dma = gf119_dma_new,
+	.disp = gp100_disp_new,
+	.fifo = gp100_fifo_new,
+	.gr = gp100_gr_new,
+	.sw = gf100_sw_new,
+};
+
+static const struct nvkm_device_chip
+nv134_chipset = {
+	.name = "GP104",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp104_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.pci = gp100_pci_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp104_ce_new,
+	.ce[1] = gp104_ce_new,
+	.ce[2] = gp104_ce_new,
+	.ce[3] = gp104_ce_new,
+	.disp = gp104_disp_new,
+	.dma = gf119_dma_new,
+	.fifo = gp100_fifo_new,
+};
+
 static int
 nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
 		       struct nvkm_notify *notify)
@@ -2221,6 +2282,9 @@ nvkm_device_engine(struct nvkm_device *device, int index)
 	_(CE0    , device->ce[0]   ,  device->ce[0]);
 	_(CE1    , device->ce[1]   ,  device->ce[1]);
 	_(CE2    , device->ce[2]   ,  device->ce[2]);
+	_(CE3    , device->ce[3]   ,  device->ce[3]);
+	_(CE4    , device->ce[4]   ,  device->ce[4]);
+	_(CE5    , device->ce[5]   ,  device->ce[5]);
 	_(CIPHER , device->cipher  ,  device->cipher);
 	_(DISP   , device->disp    , &device->disp->engine);
 	_(DMAOBJ , device->dma     , &device->dma->engine);
@@ -2235,6 +2299,7 @@ nvkm_device_engine(struct nvkm_device *device, int index)
 	_(MSVLD  , device->msvld   ,  device->msvld);
 	_(NVENC0 , device->nvenc[0],  device->nvenc[0]);
 	_(NVENC1 , device->nvenc[1],  device->nvenc[1]);
+	_(NVENC2 , device->nvenc[2],  device->nvenc[2]);
 	_(NVDEC  , device->nvdec   ,  device->nvdec);
 	_(PM     , device->pm      , &device->pm->engine);
 	_(SEC    , device->sec     ,  device->sec);
@@ -2492,6 +2557,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 			case 0x100: device->card_type = NV_E0; break;
 			case 0x110:
 			case 0x120: device->card_type = GM100; break;
+			case 0x130: device->card_type = GP100; break;
 			default:
 				break;
 			}
@@ -2576,6 +2642,8 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x124: device->chip = &nv124_chipset; break;
 		case 0x126: device->chip = &nv126_chipset; break;
 		case 0x12b: device->chip = &nv12b_chipset; break;
+		case 0x130: device->chip = &nv130_chipset; break;
+		case 0x134: device->chip = &nv134_chipset; break;
 		default:
 			nvdev_error(device, "unknown chipset (%08x)\n", boot0);
 			goto done;
@@ -2659,6 +2727,9 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_ENGINE_CE0     ,    ce[0]);
 		_(NVKM_ENGINE_CE1     ,    ce[1]);
 		_(NVKM_ENGINE_CE2     ,    ce[2]);
+		_(NVKM_ENGINE_CE3     ,    ce[3]);
+		_(NVKM_ENGINE_CE4     ,    ce[4]);
+		_(NVKM_ENGINE_CE5     ,    ce[5]);
 		_(NVKM_ENGINE_CIPHER  ,   cipher);
 		_(NVKM_ENGINE_DISP    ,     disp);
 		_(NVKM_ENGINE_DMAOBJ  ,      dma);
@@ -2673,6 +2744,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_ENGINE_MSVLD   ,    msvld);
 		_(NVKM_ENGINE_NVENC0  , nvenc[0]);
 		_(NVKM_ENGINE_NVENC1  , nvenc[1]);
+		_(NVKM_ENGINE_NVENC2  , nvenc[2]);
 		_(NVKM_ENGINE_NVDEC   ,    nvdec);
 		_(NVKM_ENGINE_PM      ,       pm);
 		_(NVKM_ENGINE_SEC     ,      sec);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 18fab3973ce5..b1b693219db3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1614,7 +1614,6 @@ nvkm_device_pci_func = {
 	.fini = nvkm_device_pci_fini,
 	.resource_addr = nvkm_device_pci_resource_addr,
 	.resource_size = nvkm_device_pci_resource_size,
-	.cpu_coherent = !IS_ENABLED(CONFIG_ARM) && !IS_ENABLED(CONFIG_ARM64),
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index ec12efb4689a..939682f18788 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -191,13 +191,11 @@ static irqreturn_t
 nvkm_device_tegra_intr(int irq, void *arg)
 {
 	struct nvkm_device_tegra *tdev = arg;
-	struct nvkm_mc *mc = tdev->device.mc;
+	struct nvkm_device *device = &tdev->device;
 	bool handled = false;
-	if (likely(mc)) {
-		nvkm_mc_intr_unarm(mc);
-		nvkm_mc_intr(mc, &handled);
-		nvkm_mc_intr_rearm(mc);
-	}
+	nvkm_mc_intr_unarm(device);
+	nvkm_mc_intr(device, &handled);
+	nvkm_mc_intr_rearm(device);
 	return handled ? IRQ_HANDLED : IRQ_NONE;
 }
 
@@ -247,7 +245,6 @@ nvkm_device_tegra_func = {
 	.fini = nvkm_device_tegra_fini,
 	.resource_addr = nvkm_device_tegra_resource_addr,
 	.resource_size = nvkm_device_tegra_resource_size,
-	.cpu_coherent = false,
 };
 
 int
@@ -313,6 +310,7 @@ nvkm_device_tegra_new(const struct nvkm_device_tegra_func *func,
 		goto remove;
 
 	tdev->gpu_speedo = tegra_sku_info.gpu_speedo_value;
+	tdev->gpu_speedo_id = tegra_sku_info.gpu_speedo_id;
 	ret = nvkm_device_ctor(&nvkm_device_tegra_func, NULL, &pdev->dev,
 			       NVKM_DEVICE_TEGRA, pdev->id, NULL,
 			       cfg, dbg, detect, mmio, subdev_mask,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 137066426ed7..79a8f71cf788 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -102,6 +102,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size)
 	case NV_C0: args->v0.family = NV_DEVICE_INFO_V0_FERMI; break;
 	case NV_E0: args->v0.family = NV_DEVICE_INFO_V0_KEPLER; break;
 	case GM100: args->v0.family = NV_DEVICE_INFO_V0_MAXWELL; break;
+	case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break;
 	default:
 		args->v0.family = 0;
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index a74c5dd27dc0..77a52b54a31e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -10,6 +10,8 @@ nvkm-y += nvkm/engine/disp/gk104.o
 nvkm-y += nvkm/engine/disp/gk110.o
 nvkm-y += nvkm/engine/disp/gm107.o
 nvkm-y += nvkm/engine/disp/gm200.o
+nvkm-y += nvkm/engine/disp/gp100.o
+nvkm-y += nvkm/engine/disp/gp104.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/outpdp.o
@@ -18,6 +20,7 @@ nvkm-y += nvkm/engine/disp/piornv50.o
 nvkm-y += nvkm/engine/disp/sornv50.o
 nvkm-y += nvkm/engine/disp/sorg94.o
 nvkm-y += nvkm/engine/disp/sorgf119.o
+nvkm-y += nvkm/engine/disp/sorgm107.o
 nvkm-y += nvkm/engine/disp/sorgm200.o
 nvkm-y += nvkm/engine/disp/dport.o
 
@@ -44,12 +47,15 @@ nvkm-y += nvkm/engine/disp/rootgk104.o
 nvkm-y += nvkm/engine/disp/rootgk110.o
 nvkm-y += nvkm/engine/disp/rootgm107.o
 nvkm-y += nvkm/engine/disp/rootgm200.o
+nvkm-y += nvkm/engine/disp/rootgp100.o
+nvkm-y += nvkm/engine/disp/rootgp104.o
 
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
 
 nvkm-y += nvkm/engine/disp/dmacnv50.o
 nvkm-y += nvkm/engine/disp/dmacgf119.o
+nvkm-y += nvkm/engine/disp/dmacgp104.o
 
 nvkm-y += nvkm/engine/disp/basenv50.o
 nvkm-y += nvkm/engine/disp/baseg84.o
@@ -58,6 +64,7 @@ nvkm-y += nvkm/engine/disp/basegt215.o
 nvkm-y += nvkm/engine/disp/basegf119.o
 nvkm-y += nvkm/engine/disp/basegk104.o
 nvkm-y += nvkm/engine/disp/basegk110.o
+nvkm-y += nvkm/engine/disp/basegp104.o
 
 nvkm-y += nvkm/engine/disp/corenv50.o
 nvkm-y += nvkm/engine/disp/coreg84.o
@@ -69,6 +76,8 @@ nvkm-y += nvkm/engine/disp/coregk104.o
 nvkm-y += nvkm/engine/disp/coregk110.o
 nvkm-y += nvkm/engine/disp/coregm107.o
 nvkm-y += nvkm/engine/disp/coregm200.o
+nvkm-y += nvkm/engine/disp/coregp100.o
+nvkm-y += nvkm/engine/disp/coregp104.o
 
 nvkm-y += nvkm/engine/disp/ovlynv50.o
 nvkm-y += nvkm/engine/disp/ovlyg84.o
@@ -76,6 +85,7 @@ nvkm-y += nvkm/engine/disp/ovlygt200.o
 nvkm-y += nvkm/engine/disp/ovlygt215.o
 nvkm-y += nvkm/engine/disp/ovlygf119.o
 nvkm-y += nvkm/engine/disp/ovlygk104.o
+nvkm-y += nvkm/engine/disp/ovlygp104.o
 
 nvkm-y += nvkm/engine/disp/piocnv50.o
 nvkm-y += nvkm/engine/disp/piocgf119.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c
new file mode 100644
index 000000000000..51688e37c54e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_base_oclass = {
+	.base.oclass = GK110_DISP_BASE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_base_new,
+	.func = &gp104_disp_dmac_func,
+	.mthd = &gf119_disp_base_chan_mthd,
+	.chid = 1,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
index aee374884c96..f5f683d9fd20 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
@@ -85,6 +85,7 @@ extern const struct nv50_disp_mthd_list gf119_disp_core_mthd_pior;
 extern const struct nv50_disp_chan_mthd gf119_disp_base_chan_mthd;
 
 extern const struct nv50_disp_chan_mthd gk104_disp_core_chan_mthd;
+extern const struct nv50_disp_chan_mthd gk104_disp_ovly_chan_mthd;
 
 struct nv50_disp_pioc_oclass {
 	int (*ctor)(const struct nv50_disp_chan_func *,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
index 6b1dc703dac7..21fbf89b6319 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregf119.c
@@ -171,7 +171,7 @@ gf119_disp_core_chan_mthd = {
 	}
 };
 
-static void
+void
 gf119_disp_core_fini(struct nv50_disp_dmac *chan)
 {
 	struct nv50_disp *disp = chan->base.root->disp;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c
new file mode 100644
index 000000000000..d5dff6619d4d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp100.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp100_disp_core_oclass = {
+	.base.oclass = GP100_DISP_CORE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_core_new,
+	.func = &gf119_disp_core_func,
+	.mthd = &gk104_disp_core_chan_mthd,
+	.chid = 0,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c
new file mode 100644
index 000000000000..6922f4007b61
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <subdev/timer.h>
+
+#include <nvif/class.h>
+
+static int
+gp104_disp_core_init(struct nv50_disp_dmac *chan)
+{
+	struct nv50_disp *disp = chan->base.root->disp;
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+
+	/* enable error reporting */
+	nvkm_mask(device, 0x6100a0, 0x00000001, 0x00000001);
+
+	/* initialise channel for dma command submission */
+	nvkm_wr32(device, 0x611494, chan->push);
+	nvkm_wr32(device, 0x611498, 0x00010000);
+	nvkm_wr32(device, 0x61149c, 0x00000001);
+	nvkm_mask(device, 0x610490, 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000, 0x00000000);
+	nvkm_wr32(device, 0x610490, 0x01000013);
+
+	/* wait for it to go inactive */
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x610490) & 0x80000000))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "core init: %08x\n",
+			   nvkm_rd32(device, 0x610490));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+const struct nv50_disp_dmac_func
+gp104_disp_core_func = {
+	.init = gp104_disp_core_init,
+	.fini = gf119_disp_core_fini,
+	.bind = gf119_disp_dmac_bind,
+};
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_core_oclass = {
+	.base.oclass = GP104_DISP_CORE_CHANNEL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_core_new,
+	.func = &gp104_disp_core_func,
+	.mthd = &gk104_disp_core_chan_mthd,
+	.chid = 0,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
index 876b14549a58..a57f7cef307a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
@@ -36,7 +36,7 @@ gf119_disp_dmac_bind(struct nv50_disp_dmac *chan,
 				 chan->base.chid << 27 | 0x00000001);
 }
 
-static void
+void
 gf119_disp_dmac_fini(struct nv50_disp_dmac *chan)
 {
 	struct nv50_disp *disp = chan->base.root->disp;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c
new file mode 100644
index 000000000000..ad24c2c57696
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <subdev/timer.h>
+
+static int
+gp104_disp_dmac_init(struct nv50_disp_dmac *chan)
+{
+	struct nv50_disp *disp = chan->base.root->disp;
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	int chid = chan->base.chid;
+
+	/* enable error reporting */
+	nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
+
+	/* initialise channel for dma command submission */
+	nvkm_wr32(device, 0x611494 + (chid * 0x0010), chan->push);
+	nvkm_wr32(device, 0x611498 + (chid * 0x0010), 0x00010000);
+	nvkm_wr32(device, 0x61149c + (chid * 0x0010), 0x00000001);
+	nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000);
+	nvkm_wr32(device, 0x610490 + (chid * 0x0010), 0x00000013);
+
+	/* wait for it to go inactive */
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x80000000))
+			break;
+	) < 0) {
+		nvkm_error(subdev, "ch %d init: %08x\n", chid,
+			   nvkm_rd32(device, 0x610490 + (chid * 0x10)));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+const struct nv50_disp_dmac_func
+gp104_disp_dmac_func = {
+	.init = gp104_disp_dmac_init,
+	.fini = gf119_disp_dmac_fini,
+	.bind = gf119_disp_dmac_bind,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
index fc84eb8b5c45..43ac05857853 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
@@ -25,8 +25,12 @@ int nv50_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
 extern const struct nv50_disp_dmac_func nv50_disp_core_func;
 
 extern const struct nv50_disp_dmac_func gf119_disp_dmac_func;
+void gf119_disp_dmac_fini(struct nv50_disp_dmac *);
 int gf119_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
 extern const struct nv50_disp_dmac_func gf119_disp_core_func;
+void gf119_disp_core_fini(struct nv50_disp_dmac *);
+
+extern const struct nv50_disp_dmac_func gp104_disp_dmac_func;
 
 struct nv50_disp_dmac_oclass {
 	int (*ctor)(const struct nv50_disp_dmac_func *,
@@ -88,4 +92,10 @@ extern const struct nv50_disp_dmac_oclass gk110_disp_base_oclass;
 extern const struct nv50_disp_dmac_oclass gm107_disp_core_oclass;
 
 extern const struct nv50_disp_dmac_oclass gm200_disp_core_oclass;
+
+extern const struct nv50_disp_dmac_oclass gp100_disp_core_oclass;
+
+extern const struct nv50_disp_dmac_oclass gp104_disp_core_oclass;
+extern const struct nv50_disp_dmac_oclass gp104_disp_base_oclass;
+extern const struct nv50_disp_dmac_oclass gp104_disp_ovly_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
index f0314664349c..29e84b241cca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -79,8 +79,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl,
 	list_for_each_entry(outp, &disp->base.outp, head) {
 		if ((outp->info.hasht & 0xff) == type &&
 		    (outp->info.hashm & mask) == mask) {
-			*data = nvbios_outp_match(bios, outp->info.hasht,
-							outp->info.hashm,
+			*data = nvbios_outp_match(bios, outp->info.hasht, mask,
 						  ver, hdr, cnt, len, info);
 			if (!*data)
 				return NULL;
@@ -155,25 +154,21 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	if (!outp)
 		return NULL;
 
+	*conf = (ctrl & 0x00000f00) >> 8;
 	switch (outp->info.type) {
 	case DCB_OUTPUT_TMDS:
-		*conf = (ctrl & 0x00000f00) >> 8;
 		if (*conf == 5)
 			*conf |= 0x0100;
 		break;
 	case DCB_OUTPUT_LVDS:
-		*conf = disp->sor.lvdsconf;
+		*conf |= disp->sor.lvdsconf;
 		break;
-	case DCB_OUTPUT_DP:
-		*conf = (ctrl & 0x00000f00) >> 8;
-		break;
-	case DCB_OUTPUT_ANALOG:
 	default:
-		*conf = 0x00ff;
 		break;
 	}
 
-	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
+	data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8,
+				 &ver, &hdr, &cnt, &len, &info2);
 	if (data && id < 0xff) {
 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
 		if (data) {
@@ -418,7 +413,7 @@ gf119_disp_intr_supervisor(struct work_struct *work)
 	nvkm_wr32(device, 0x6101d0, 0x80000000);
 }
 
-static void
+void
 gf119_disp_intr_error(struct nv50_disp *disp, int chid)
 {
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
@@ -466,7 +461,7 @@ gf119_disp_intr(struct nv50_disp *disp)
 		u32 stat = nvkm_rd32(device, 0x61009c);
 		int chid = ffs(stat) - 1;
 		if (chid >= 0)
-			gf119_disp_intr_error(disp, chid);
+			disp->func->intr_error(disp, chid);
 		intr &= ~0x00000002;
 	}
 
@@ -510,6 +505,7 @@ gf119_disp_new_(const struct nv50_disp_func *func, struct nvkm_device *device,
 static const struct nv50_disp_func
 gf119_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gf119_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
index a86384b8e388..37f145cf30d7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gk104_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gk104_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
index 0d574c7e594a..e14ac946608c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk110.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gk110_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gk110_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
index b6944142d616..2f2437cc5891 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gm107_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gm107_disp_root_oclass,
@@ -36,7 +37,7 @@ gm107_disp = {
 	.outp.internal.crt = nv50_dac_output_new,
 	.outp.internal.tmds = nv50_sor_output_new,
 	.outp.internal.lvds = nv50_sor_output_new,
-	.outp.internal.dp = gf119_sor_dp_new,
+	.outp.internal.dp = gm107_sor_dp_new,
 	.dac.nr = 3,
 	.dac.power = nv50_dac_power,
 	.dac.sense = nv50_dac_sense,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
index 67eec8620719..9f368d4ee61e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
@@ -27,6 +27,7 @@
 static const struct nv50_disp_func
 gm200_disp = {
 	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
 	.root = &gm200_disp_root_oclass,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
new file mode 100644
index 000000000000..4f81bf31435e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "rootnv50.h"
+
+static const struct nv50_disp_func
+gp100_disp = {
+	.intr = gf119_disp_intr,
+	.intr_error = gf119_disp_intr_error,
+	.uevent = &gf119_disp_chan_uevent,
+	.super = gf119_disp_intr_supervisor,
+	.root = &gp100_disp_root_oclass,
+	.head.vblank_init = gf119_disp_vblank_init,
+	.head.vblank_fini = gf119_disp_vblank_fini,
+	.head.scanoutpos = gf119_disp_root_scanoutpos,
+	.outp.internal.crt = nv50_dac_output_new,
+	.outp.internal.tmds = nv50_sor_output_new,
+	.outp.internal.lvds = nv50_sor_output_new,
+	.outp.internal.dp = gm200_sor_dp_new,
+	.dac.nr = 3,
+	.dac.power = nv50_dac_power,
+	.dac.sense = nv50_dac_sense,
+	.sor.nr = 4,
+	.sor.power = nv50_sor_power,
+	.sor.hda_eld = gf119_hda_eld,
+	.sor.hdmi = gk104_hdmi_ctrl,
+	.sor.magic = gm200_sor_magic,
+};
+
+int
+gp100_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return gf119_disp_new_(&gp100_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c
new file mode 100644
index 000000000000..3bf3380336e4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "rootnv50.h"
+
+static void
+gp104_disp_intr_error(struct nv50_disp *disp, int chid)
+{
+	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mthd = nvkm_rd32(device, 0x6111f0 + (chid * 12));
+	u32 data = nvkm_rd32(device, 0x6111f4 + (chid * 12));
+	u32 unkn = nvkm_rd32(device, 0x6111f8 + (chid * 12));
+
+	nvkm_error(subdev, "chid %d mthd %04x data %08x %08x %08x\n",
+		   chid, (mthd & 0x0000ffc), data, mthd, unkn);
+
+	if (chid < ARRAY_SIZE(disp->chan)) {
+		switch (mthd & 0xffc) {
+		case 0x0080:
+			nv50_disp_chan_mthd(disp->chan[chid], NV_DBG_ERROR);
+			break;
+		default:
+			break;
+		}
+	}
+
+	nvkm_wr32(device, 0x61009c, (1 << chid));
+	nvkm_wr32(device, 0x6111f0 + (chid * 12), 0x90000000);
+}
+
+static const struct nv50_disp_func
+gp104_disp = {
+	.intr = gf119_disp_intr,
+	.intr_error = gp104_disp_intr_error,
+	.uevent = &gf119_disp_chan_uevent,
+	.super = gf119_disp_intr_supervisor,
+	.root = &gp104_disp_root_oclass,
+	.head.vblank_init = gf119_disp_vblank_init,
+	.head.vblank_fini = gf119_disp_vblank_fini,
+	.head.scanoutpos = gf119_disp_root_scanoutpos,
+	.outp.internal.crt = nv50_dac_output_new,
+	.outp.internal.tmds = nv50_sor_output_new,
+	.outp.internal.lvds = nv50_sor_output_new,
+	.outp.internal.dp = gm200_sor_dp_new,
+	.dac.nr = 3,
+	.dac.power = nv50_dac_power,
+	.dac.sense = nv50_dac_sense,
+	.sor.nr = 4,
+	.sor.power = nv50_sor_power,
+	.sor.hda_eld = gf119_hda_eld,
+	.sor.hdmi = gk104_hdmi_ctrl,
+	.sor.magic = gm200_sor_magic,
+};
+
+int
+gp104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return gf119_disp_new_(&gp104_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index 4226d2153b9c..fbb8c7dc18fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -32,6 +32,7 @@
 #include <subdev/bios/init.h>
 #include <subdev/bios/pll.h>
 #include <subdev/devinit.h>
+#include <subdev/timer.h>
 
 static const struct nvkm_disp_oclass *
 nv50_disp_root_(struct nvkm_disp *base)
@@ -269,8 +270,7 @@ exec_lookup(struct nv50_disp *disp, int head, int or, u32 ctrl,
 	list_for_each_entry(outp, &disp->base.outp, head) {
 		if ((outp->info.hasht & 0xff) == type &&
 		    (outp->info.hashm & mask) == mask) {
-			*data = nvbios_outp_match(bios, outp->info.hasht,
-							outp->info.hashm,
+			*data = nvbios_outp_match(bios, outp->info.hasht, mask,
 						  ver, hdr, cnt, len, info);
 			if (!*data)
 				return NULL;
@@ -387,22 +387,17 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	if (!outp)
 		return NULL;
 
+	*conf = (ctrl & 0x00000f00) >> 8;
 	if (outp->info.location == 0) {
 		switch (outp->info.type) {
 		case DCB_OUTPUT_TMDS:
-			*conf = (ctrl & 0x00000f00) >> 8;
 			if (*conf == 5)
 				*conf |= 0x0100;
 			break;
 		case DCB_OUTPUT_LVDS:
-			*conf = disp->sor.lvdsconf;
+			*conf |= disp->sor.lvdsconf;
 			break;
-		case DCB_OUTPUT_DP:
-			*conf = (ctrl & 0x00000f00) >> 8;
-			break;
-		case DCB_OUTPUT_ANALOG:
 		default:
-			*conf = 0x00ff;
 			break;
 		}
 	} else {
@@ -410,7 +405,8 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 		pclk = pclk / 2;
 	}
 
-	data = nvbios_ocfg_match(bios, data, *conf, &ver, &hdr, &cnt, &len, &info2);
+	data = nvbios_ocfg_match(bios, data, *conf & 0xff, *conf >> 8,
+				 &ver, &hdr, &cnt, &len, &info2);
 	if (data && id < 0xff) {
 		data = nvbios_oclk_match(bios, info2.clkcmp[id], pclk);
 		if (data) {
@@ -430,6 +426,134 @@ exec_clkcmp(struct nv50_disp *disp, int head, int id, u32 pclk, u32 *conf)
 	return outp;
 }
 
+static bool
+nv50_disp_dptmds_war(struct nvkm_device *device)
+{
+	switch (device->chipset) {
+	case 0x94:
+	case 0x96:
+	case 0x98:
+	case 0xaa:
+	case 0xac:
+		return true;
+	default:
+		break;
+	}
+	return false;
+}
+
+static bool
+nv50_disp_dptmds_war_needed(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+	if (nv50_disp_dptmds_war(device) && outp->type == DCB_OUTPUT_TMDS) {
+		switch (nvkm_rd32(device, 0x614300 + soff) & 0x00030000) {
+		case 0x00000000:
+		case 0x00030000:
+			return true;
+		default:
+			break;
+		}
+	}
+	return false;
+
+}
+
+static void
+nv50_disp_dptmds_war_2(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+
+	if (!nv50_disp_dptmds_war_needed(disp, outp))
+		return;
+
+	nvkm_mask(device, 0x00e840, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x614300 + soff, 0x03000000, 0x03000000);
+	nvkm_mask(device, 0x61c10c + soff, 0x00000001, 0x00000001);
+
+	nvkm_mask(device, 0x61c00c + soff, 0x0f000000, 0x00000000);
+	nvkm_mask(device, 0x61c008 + soff, 0xff000000, 0x14000000);
+	nvkm_usec(device, 400, NVKM_DELAY);
+	nvkm_mask(device, 0x61c008 + soff, 0xff000000, 0x00000000);
+	nvkm_mask(device, 0x61c00c + soff, 0x0f000000, 0x01000000);
+
+	if (nvkm_rd32(device, 0x61c004 + soff) & 0x00000001) {
+		u32 seqctl = nvkm_rd32(device, 0x61c030 + soff);
+		u32  pu_pc = seqctl & 0x0000000f;
+		nvkm_wr32(device, 0x61c040 + soff + pu_pc * 4, 0x1f008000);
+	}
+}
+
+static void
+nv50_disp_dptmds_war_3(struct nv50_disp *disp, struct dcb_output *outp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	const u32 soff = __ffs(outp->or) * 0x800;
+	u32 sorpwr;
+
+	if (!nv50_disp_dptmds_war_needed(disp, outp))
+		return;
+
+	sorpwr = nvkm_rd32(device, 0x61c004 + soff);
+	if (sorpwr & 0x00000001) {
+		u32 seqctl = nvkm_rd32(device, 0x61c030 + soff);
+		u32  pd_pc = (seqctl & 0x00000f00) >> 8;
+		u32  pu_pc =  seqctl & 0x0000000f;
+
+		nvkm_wr32(device, 0x61c040 + soff + pd_pc * 4, 0x1f008000);
+
+		nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x61c030 + soff) & 0x10000000))
+				break;
+		);
+		nvkm_mask(device, 0x61c004 + soff, 0x80000001, 0x80000000);
+		nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x61c030 + soff) & 0x10000000))
+				break;
+		);
+
+		nvkm_wr32(device, 0x61c040 + soff + pd_pc * 4, 0x00002000);
+		nvkm_wr32(device, 0x61c040 + soff + pu_pc * 4, 0x1f000000);
+	}
+
+	nvkm_mask(device, 0x61c10c + soff, 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x614300 + soff, 0x03000000, 0x00000000);
+
+	if (sorpwr & 0x00000001) {
+		nvkm_mask(device, 0x61c004 + soff, 0x80000001, 0x80000001);
+	}
+}
+
+static void
+nv50_disp_update_sppll1(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	bool used = false;
+	int sor;
+
+	if (!nv50_disp_dptmds_war(device))
+		return;
+
+	for (sor = 0; sor < disp->func->sor.nr; sor++) {
+		u32 clksor = nvkm_rd32(device, 0x614300 + (sor * 0x800));
+		switch (clksor & 0x03000000) {
+		case 0x02000000:
+		case 0x03000000:
+			used = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	if (used)
+		return;
+
+	nvkm_mask(device, 0x00e840, 0x80000000, 0x00000000);
+}
+
 static void
 nv50_disp_intr_unk10_0(struct nv50_disp *disp, int head)
 {
@@ -683,6 +807,8 @@ nv50_disp_intr_unk20_2(struct nv50_disp *disp, int head)
 
 	nvkm_mask(device, hreg, 0x0000000f, hval);
 	nvkm_mask(device, oreg, mask, oval);
+
+	nv50_disp_dptmds_war_2(disp, &outp->info);
 }
 
 /* If programming a TMDS output on a SOR that can also be configured for
@@ -724,6 +850,7 @@ nv50_disp_intr_unk40_0(struct nv50_disp *disp, int head)
 
 	if (outp->info.location == 0 && outp->info.type == DCB_OUTPUT_TMDS)
 		nv50_disp_intr_unk40_0_tmds(disp, &outp->info);
+	nv50_disp_dptmds_war_3(disp, &outp->info);
 }
 
 void
@@ -771,6 +898,7 @@ nv50_disp_intr_supervisor(struct work_struct *work)
 				continue;
 			nv50_disp_intr_unk40_0(disp, head);
 		}
+		nv50_disp_update_sppll1(disp);
 	}
 
 	nvkm_wr32(device, 0x610030, 0x80000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
index aecebd8717e5..1e1de6bfe85a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
@@ -68,6 +68,7 @@ struct nv50_disp_func_outp {
 
 struct nv50_disp_func {
 	void (*intr)(struct nv50_disp *);
+	void (*intr_error)(struct nv50_disp *, int chid);
 
 	const struct nvkm_event_func *uevent;
 	void (*super)(struct work_struct *);
@@ -114,4 +115,5 @@ void gf119_disp_vblank_init(struct nv50_disp *, int);
 void gf119_disp_vblank_fini(struct nv50_disp *, int);
 void gf119_disp_intr(struct nv50_disp *);
 void gf119_disp_intr_supervisor(struct work_struct *);
+void gf119_disp_intr_error(struct nv50_disp *, int);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
index e9067ba4e179..4e983f6d7032 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
@@ -62,7 +62,12 @@ int g94_sor_dp_lnk_pwr(struct nvkm_output_dp *, int);
 int gf119_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
 		     struct nvkm_output **);
 int gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *, int, int, bool);
+int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *, int, int, int, int);
 
-int  gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
-		      struct nvkm_output **);
+int gm107_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+		     struct nvkm_output **);
+int gm107_sor_dp_pattern(struct nvkm_output_dp *, int);
+
+int gm200_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
+		     struct nvkm_output **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
index 2e2dc0641ef2..2f0220b39f34 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygk104.c
@@ -80,7 +80,7 @@ gk104_disp_ovly_mthd_base = {
 	}
 };
 
-static const struct nv50_disp_chan_mthd
+const struct nv50_disp_chan_mthd
 gk104_disp_ovly_chan_mthd = {
 	.name = "Overlay",
 	.addr = 0x001000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c
new file mode 100644
index 000000000000..97e2dd2d908e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "dmacnv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_dmac_oclass
+gp104_disp_ovly_oclass = {
+	.base.oclass = GK104_DISP_OVERLAY_CONTROL_DMA,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_ovly_new,
+	.func = &gp104_disp_dmac_func,
+	.mthd = &gk104_disp_ovly_chan_mthd,
+	.chid = 5,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
new file mode 100644
index 000000000000..ac8fdd728ec6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp100.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "rootnv50.h"
+#include "dmacnv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+gp100_disp_root = {
+	.init = gf119_disp_root_init,
+	.fini = gf119_disp_root_fini,
+	.dmac = {
+		&gp100_disp_core_oclass,
+		&gk110_disp_base_oclass,
+		&gk104_disp_ovly_oclass,
+	},
+	.pioc = {
+		&gk104_disp_oimm_oclass,
+		&gk104_disp_curs_oclass,
+	},
+};
+
+static int
+gp100_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&gp100_disp_root, disp, oclass,
+				   data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+gp100_disp_root_oclass = {
+	.base.oclass = GP100_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = gp100_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c
new file mode 100644
index 000000000000..8443e04dc626
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "rootnv50.h"
+#include "dmacnv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+gp104_disp_root = {
+	.init = gf119_disp_root_init,
+	.fini = gf119_disp_root_fini,
+	.dmac = {
+		&gp104_disp_core_oclass,
+		&gp104_disp_base_oclass,
+		&gp104_disp_ovly_oclass,
+	},
+	.pioc = {
+		&gk104_disp_oimm_oclass,
+		&gk104_disp_curs_oclass,
+	},
+};
+
+static int
+gp104_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&gp104_disp_root, disp, oclass,
+				   data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+gp104_disp_root_oclass = {
+	.base.oclass = GP104_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = gp104_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index cb449ed8d92c..ad00f1724b72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -40,4 +40,6 @@ extern const struct nvkm_disp_oclass gk104_disp_root_oclass;
 extern const struct nvkm_disp_oclass gk110_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm107_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm200_disp_root_oclass;
+extern const struct nvkm_disp_oclass gp100_disp_root_oclass;
+extern const struct nvkm_disp_oclass gp104_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index b4b41b135643..22706c0a54b5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -40,8 +40,7 @@ static int
 gf119_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	const u32 loff = gf119_sor_loff(outp);
-	nvkm_mask(device, 0x61c110 + loff, 0x0f0f0f0f, 0x01010101 * pattern);
+	nvkm_mask(device, 0x61c110, 0x0f0f0f0f, 0x01010101 * pattern);
 	return 0;
 }
 
@@ -64,7 +63,7 @@ gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef)
 	return 0;
 }
 
-static int
+int
 gf119_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 		     int ln, int vs, int pe, int pc)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
new file mode 100644
index 000000000000..37790b2617c5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "nv50.h"
+#include "outpdp.h"
+
+int
+gm107_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
+{
+	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
+	const u32 soff = outp->base.or * 0x800;
+	const u32 data = 0x01010101 * pattern;
+	if (outp->base.info.sorconf.link & 1)
+		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
+	else
+		nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data);
+	return 0;
+}
+
+static const struct nvkm_output_dp_func
+gm107_sor_dp_func = {
+	.pattern = gm107_sor_dp_pattern,
+	.lnk_pwr = g94_sor_dp_lnk_pwr,
+	.lnk_ctl = gf119_sor_dp_lnk_ctl,
+	.drv_ctl = gf119_sor_dp_drv_ctl,
+};
+
+int
+gm107_sor_dp_new(struct nvkm_disp *disp, int index,
+		 struct dcb_output *dcbE, struct nvkm_output **poutp)
+{
+	return nvkm_output_dp_new_(&gm107_sor_dp_func, disp, index, dcbE, poutp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
index 2cfbef9c344f..c44fa7ea672a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
@@ -57,19 +57,6 @@ gm200_sor_dp_lane_map(struct nvkm_device *device, u8 lane)
 }
 
 static int
-gm200_sor_dp_pattern(struct nvkm_output_dp *outp, int pattern)
-{
-	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
-	const u32 soff = gm200_sor_soff(outp);
-	const u32 data = 0x01010101 * pattern;
-	if (outp->base.info.sorconf.link & 1)
-		nvkm_mask(device, 0x61c110 + soff, 0x0f0f0f0f, data);
-	else
-		nvkm_mask(device, 0x61c12c + soff, 0x0f0f0f0f, data);
-	return 0;
-}
-
-static int
 gm200_sor_dp_lnk_pwr(struct nvkm_output_dp *outp, int nr)
 {
 	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
@@ -129,7 +116,7 @@ gm200_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 
 static const struct nvkm_output_dp_func
 gm200_sor_dp_func = {
-	.pattern = gm200_sor_dp_pattern,
+	.pattern = gm107_sor_dp_pattern,
 	.lnk_pwr = gm200_sor_dp_lnk_pwr,
 	.lnk_ctl = gf119_sor_dp_lnk_ctl,
 	.drv_ctl = gm200_sor_dp_drv_ctl,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 65e5d291ecda..98651a43bc12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -13,6 +13,7 @@ nvkm-y += nvkm/engine/fifo/gk20a.o
 nvkm-y += nvkm/engine/fifo/gm107.o
 nvkm-y += nvkm/engine/fifo/gm200.o
 nvkm-y += nvkm/engine/fifo/gm20b.o
+nvkm-y += nvkm/engine/fifo/gp100.o
 
 nvkm-y += nvkm/engine/fifo/chan.o
 nvkm-y += nvkm/engine/fifo/channv50.o
@@ -31,3 +32,4 @@ nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
 nvkm-y += nvkm/engine/fifo/gpfifogk110.o
 nvkm-y += nvkm/engine/fifo/gpfifogm200.o
+nvkm-y += nvkm/engine/fifo/gpfifogp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
index e06f4d46f802..230f64e5f731 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -27,4 +27,5 @@ int gk104_fifo_gpfifo_new(struct nvkm_fifo *, const struct nvkm_oclass *,
 extern const struct nvkm_fifo_chan_oclass gk104_fifo_gpfifo_oclass;
 extern const struct nvkm_fifo_chan_oclass gk110_fifo_gpfifo_oclass;
 extern const struct nvkm_fifo_chan_oclass gm200_fifo_gpfifo_oclass;
+extern const struct nvkm_fifo_chan_oclass gp100_fifo_gpfifo_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 743f3a189f28..103c0afaaa6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -329,7 +329,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 	}
 
 	if (eu == NULL) {
-		enum nvkm_devidx engidx = nvkm_top_fault(device->top, unit);
+		enum nvkm_devidx engidx = nvkm_top_fault(device, unit);
 		if (engidx < NVKM_SUBDEV_NR) {
 			const char *src = nvkm_subdev_name[engidx];
 			char *dst = en;
@@ -589,7 +589,6 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 	struct gk104_fifo *fifo = gk104_fifo(base);
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_top *top = device->top;
 	int engn, runl, pbid, ret, i, j;
 	enum nvkm_devidx engidx;
 	u32 *map;
@@ -608,7 +607,7 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 
 	/* Determine runlist configuration from topology device info. */
 	i = 0;
-	while ((int)(engidx = nvkm_top_engine(top, i++, &runl, &engn)) >= 0) {
+	while ((int)(engidx = nvkm_top_engine(device, i++, &runl, &engn)) >= 0) {
 		/* Determine which PBDMA handles requests for this engine. */
 		for (j = 0, pbid = -1; j < fifo->pbdma_nr; j++) {
 			if (map[j] & (1 << runl)) {
@@ -617,8 +616,8 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 			}
 		}
 
-		nvkm_debug(subdev, "engine %2d: runlist %2d pbdma %2d\n",
-			   engn, runl, pbid);
+		nvkm_debug(subdev, "engine %2d: runlist %2d pbdma %2d (%s)\n",
+			   engn, runl, pbid, nvkm_subdev_name[engidx]);
 
 		fifo->engine[engn].engine = nvkm_device_engine(device, engidx);
 		fifo->engine[engn].runl = runl;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
new file mode 100644
index 000000000000..eff83f7fb705
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "gk104.h"
+#include "changk104.h"
+
+static const struct nvkm_enum
+gp100_fifo_fault_engine[] = {
+	{ 0x01, "DISPLAY" },
+	{ 0x03, "IFB", NULL, NVKM_ENGINE_IFB },
+	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
+	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
+	{ 0x06, "HOST0" },
+	{ 0x07, "HOST1" },
+	{ 0x08, "HOST2" },
+	{ 0x09, "HOST3" },
+	{ 0x0a, "HOST4" },
+	{ 0x0b, "HOST5" },
+	{ 0x0c, "HOST6" },
+	{ 0x0d, "HOST7" },
+	{ 0x0e, "HOST8" },
+	{ 0x0f, "HOST9" },
+	{ 0x10, "HOST10" },
+	{ 0x13, "PERF" },
+	{ 0x17, "PMU" },
+	{ 0x18, "PTP" },
+	{ 0x1f, "PHYSICAL" },
+	{}
+};
+
+static const struct gk104_fifo_func
+gp100_fifo = {
+	.fault.engine = gp100_fifo_fault_engine,
+	.fault.reason = gk104_fifo_fault_reason,
+	.fault.hubclient = gk104_fifo_fault_hubclient,
+	.fault.gpcclient = gk104_fifo_fault_gpcclient,
+	.chan = {
+		&gp100_fifo_gpfifo_oclass,
+		NULL
+	},
+};
+
+int
+gp100_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+{
+	return gk104_fifo_new_(&gp100_fifo, device, index, 4096, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c
new file mode 100644
index 000000000000..1530a9217aea
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogp100.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "changk104.h"
+
+#include <nvif/class.h>
+
+const struct nvkm_fifo_chan_oclass
+gp100_fifo_gpfifo_oclass = {
+	.base.oclass = PASCAL_CHANNEL_GPFIFO_A,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = gk104_fifo_gpfifo_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 290ed0db8047..f1c494182248 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -31,6 +31,7 @@ nvkm-y += nvkm/engine/gr/gk20a.o
 nvkm-y += nvkm/engine/gr/gm107.o
 nvkm-y += nvkm/engine/gr/gm200.o
 nvkm-y += nvkm/engine/gr/gm20b.o
+nvkm-y += nvkm/engine/gr/gp100.o
 
 nvkm-y += nvkm/engine/gr/ctxnv40.o
 nvkm-y += nvkm/engine/gr/ctxnv50.o
@@ -48,3 +49,4 @@ nvkm-y += nvkm/engine/gr/ctxgk20a.o
 nvkm-y += nvkm/engine/gr/ctxgm107.o
 nvkm-y += nvkm/engine/gr/ctxgm200.o
 nvkm-y += nvkm/engine/gr/ctxgm20b.o
+nvkm-y += nvkm/engine/gr/ctxgp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index b02d8f50ea6a..bc77eea351a5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -1240,7 +1240,7 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -1264,7 +1264,7 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index ac895edce164..52048b5a5274 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -101,6 +101,8 @@ void gm200_grctx_generate_405b60(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gm20b_grctx;
 
+extern const struct gf100_grctx_func gp100_grctx;
+
 /* context init value lists */
 
 extern const struct gf100_gr_pack gf100_grctx_pack_icmd[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index f521de11a299..c925ade5880e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -226,7 +226,7 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	u32 idle_timeout;
 	int i;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -253,7 +253,7 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 }
 
 const struct gf100_grctx_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 9ba337778ef5..c46b3fdf7203 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -950,7 +950,7 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	u32 idle_timeout;
 	int i;
 
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 
 	gf100_gr_mmio(gr, grctx->hub);
 	gf100_gr_mmio(gr, grctx->gpc);
@@ -979,7 +979,7 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_icmd(gr, grctx->icmd);
 	nvkm_wr32(device, 0x404154, idle_timeout);
 	gf100_gr_mthd(gr, grctx->mthd);
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 
 	nvkm_mask(device, 0x418800, 0x00200000, 0x00200000);
 	nvkm_mask(device, 0x41be10, 0x00800000, 0x00800000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
new file mode 100644
index 000000000000..3d1ae7ddf7dd
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "ctxgf100.h"
+
+#include <subdev/fb.h>
+
+/*******************************************************************************
+ * PGRAPH context implementation
+ ******************************************************************************/
+
+static void
+gp100_grctx_generate_pagepool(struct gf100_grctx *info)
+{
+	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
+	const u32 access = NV_MEM_ACCESS_RW | NV_MEM_ACCESS_SYS;
+	const int s = 8;
+	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), access);
+	mmio_refn(info, 0x40800c, 0x00000000, s, b);
+	mmio_wr32(info, 0x408010, 0x80000000);
+	mmio_refn(info, 0x419004, 0x00000000, s, b);
+	mmio_wr32(info, 0x419008, 0x00000000);
+}
+
+static void
+gp100_grctx_generate_attrib(struct gf100_grctx *info)
+{
+	struct gf100_gr *gr = info->gr;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	const u32  alpha = grctx->alpha_nr;
+	const u32 attrib = grctx->attrib_nr;
+	const u32 pertpc = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
+	const u32   size = roundup(gr->tpc_total * pertpc, 0x80);
+	const u32 access = NV_MEM_ACCESS_RW;
+	const int s = 12;
+	const int b = mmio_vram(info, size, (1 << s), access);
+	const int max_batches = 0xffff;
+	u32 ao = 0;
+	u32 bo = ao + grctx->alpha_nr_max * gr->tpc_total;
+	int gpc, ppc, n = 0;
+
+	mmio_refn(info, 0x418810, 0x80000000, s, b);
+	mmio_refn(info, 0x419848, 0x10000000, s, b);
+	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
+	mmio_refn(info, 0x419b00, 0x00000000, s, b);
+	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
+	mmio_wr32(info, 0x405830, attrib);
+	mmio_wr32(info, 0x40585c, alpha);
+	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
+			const u32 u = 0x418ea0 + (n * 0x04);
+			const u32 o = PPC_UNIT(gpc, ppc, 0);
+			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
+				continue;
+			mmio_wr32(info, o + 0xc0, bs);
+			mmio_wr32(info, o + 0xf4, bo);
+			mmio_wr32(info, o + 0xf0, bs);
+			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			mmio_wr32(info, o + 0xe4, as);
+			mmio_wr32(info, o + 0xf8, ao);
+			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
+			mmio_wr32(info, u, bs);
+		}
+	}
+
+	mmio_wr32(info, 0x418eec, 0x00000000);
+	mmio_wr32(info, 0x41befc, 0x00000000);
+}
+
+static void
+gp100_grctx_generate_405b60(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
+	u32 dist[TPC_MAX / 4] = {};
+	u32 gpcs[GPC_MAX * 2] = {};
+	u8  tpcnr[GPC_MAX];
+	int tpc, gpc, i;
+
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+
+	/* won't result in the same distribution as the binary driver where
+	 * some of the gpcs have more tpcs than others, but this shall do
+	 * for the moment.  the code for earlier gpus has this issue too.
+	 */
+	for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while(!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
+		gpcs[gpc + (gr->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
+	}
+
+	for (i = 0; i < dist_nr; i++)
+		nvkm_wr32(device, 0x405b60 + (i * 4), dist[i]);
+	for (i = 0; i < gr->gpc_nr * 2; i++)
+		nvkm_wr32(device, 0x405ba0 + (i * 4), gpcs[i]);
+}
+
+static void
+gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 idle_timeout, tmp;
+	int i;
+
+	gf100_gr_mmio(gr, gr->fuc_sw_ctx);
+
+	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
+
+	grctx->pagepool(info);
+	grctx->bundle(info);
+	grctx->attrib(info);
+	grctx->unkn(gr);
+
+	gm200_grctx_generate_tpcid(gr);
+	gf100_grctx_generate_r406028(gr);
+	gk104_grctx_generate_r418bb8(gr);
+
+	for (i = 0; i < 8; i++)
+		nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000);
+	nvkm_wr32(device, 0x406500, 0x00000000);
+
+	nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr);
+
+	for (tmp = 0, i = 0; i < gr->gpc_nr; i++)
+		tmp |= ((1 << gr->tpc_nr[i]) - 1) << (i * 5);
+	nvkm_wr32(device, 0x4041c4, tmp);
+
+	gp100_grctx_generate_405b60(gr);
+
+	gf100_gr_icmd(gr, gr->fuc_bundle);
+	nvkm_wr32(device, 0x404154, idle_timeout);
+	gf100_gr_mthd(gr, gr->fuc_method);
+}
+
+const struct gf100_grctx_func
+gp100_grctx = {
+	.main  = gp100_grctx_generate_main,
+	.unkn  = gk104_grctx_generate_unkn,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x3000,
+	.bundle_min_gpm_fifo_depth = 0x180,
+	.bundle_token_limit = 0x1080,
+	.pagepool = gp100_grctx_generate_pagepool,
+	.pagepool_size = 0x20000,
+	.attrib = gp100_grctx_generate_attrib,
+	.attrib_nr_max = 0x660,
+	.attrib_nr = 0x440,
+	.alpha_nr_max = 0xc00,
+	.alpha_nr = 0x800,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 9513badb8220..157919c788e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -949,22 +949,41 @@ gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
 }
 
 static const struct nvkm_enum gf100_mp_warp_error[] = {
-	{ 0x00, "NO_ERROR" },
-	{ 0x01, "STACK_MISMATCH" },
+	{ 0x01, "STACK_ERROR" },
+	{ 0x02, "API_STACK_ERROR" },
+	{ 0x03, "RET_EMPTY_STACK_ERROR" },
+	{ 0x04, "PC_WRAP" },
 	{ 0x05, "MISALIGNED_PC" },
-	{ 0x08, "MISALIGNED_GPR" },
-	{ 0x09, "INVALID_OPCODE" },
-	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
-	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
-	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
+	{ 0x06, "PC_OVERFLOW" },
+	{ 0x07, "MISALIGNED_IMMC_ADDR" },
+	{ 0x08, "MISALIGNED_REG" },
+	{ 0x09, "ILLEGAL_INSTR_ENCODING" },
+	{ 0x0a, "ILLEGAL_SPH_INSTR_COMBO" },
+	{ 0x0b, "ILLEGAL_INSTR_PARAM" },
+	{ 0x0c, "INVALID_CONST_ADDR" },
+	{ 0x0d, "OOR_REG" },
+	{ 0x0e, "OOR_ADDR" },
+	{ 0x0f, "MISALIGNED_ADDR" },
 	{ 0x10, "INVALID_ADDR_SPACE" },
-	{ 0x11, "INVALID_PARAM" },
+	{ 0x11, "ILLEGAL_INSTR_PARAM2" },
+	{ 0x12, "INVALID_CONST_ADDR_LDC" },
+	{ 0x13, "GEOMETRY_SM_ERROR" },
+	{ 0x14, "DIVERGENT" },
+	{ 0x15, "WARP_EXIT" },
 	{}
 };
 
 static const struct nvkm_bitfield gf100_mp_global_error[] = {
+	{ 0x00000001, "SM_TO_SM_FAULT" },
+	{ 0x00000002, "L1_ERROR" },
 	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
-	{ 0x00000008, "OUT_OF_STACK_SPACE" },
+	{ 0x00000008, "PHYSICAL_STACK_OVERFLOW" },
+	{ 0x00000010, "BPT_INT" },
+	{ 0x00000020, "BPT_PAUSE" },
+	{ 0x00000040, "SINGLE_STEP_COMPLETE" },
+	{ 0x20000000, "ECC_SEC_ERROR" },
+	{ 0x40000000, "ECC_DED_ERROR" },
+	{ 0x80000000, "TIMEOUT" },
 	{}
 };
 
@@ -1438,24 +1457,30 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_secboot *sb = device->secboot;
 	int i;
+	int ret = 0;
 
 	if (gr->firmware) {
 		/* load fuc microcode */
-		nvkm_mc_unk260(device->mc, 0);
+		nvkm_mc_unk260(device, 0);
 
 		/* securely-managed falcons must be reset using secure boot */
 		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
+			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
 		else
 			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
 					 &gr->fuc409d);
+		if (ret)
+			return ret;
+
 		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
+			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
 		else
 			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
 					 &gr->fuc41ad);
+		if (ret)
+			return ret;
 
-		nvkm_mc_unk260(device->mc, 1);
+		nvkm_mc_unk260(device, 1);
 
 		/* start both of them running */
 		nvkm_wr32(device, 0x409840, 0xffffffff);
@@ -1557,7 +1582,7 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	}
 
 	/* load HUB microcode */
-	nvkm_mc_unk260(device->mc, 0);
+	nvkm_mc_unk260(device, 0);
 	nvkm_wr32(device, 0x4091c0, 0x01000000);
 	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
 		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
@@ -1580,7 +1605,7 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 			nvkm_wr32(device, 0x41a188, i >> 6);
 		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
 	}
-	nvkm_mc_unk260(device->mc, 1);
+	nvkm_mc_unk260(device, 1);
 
 	/* load register lists */
 	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 2b98abdb9270..268b8d60ff73 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -292,4 +292,6 @@ extern const struct gf100_gr_init gm107_gr_init_l1c_0[];
 extern const struct gf100_gr_init gm107_gr_init_wwdx_0[];
 extern const struct gf100_gr_init gm107_gr_init_cbm_0[];
 void gm107_gr_init_bios(struct gf100_gr *);
+
+void gm200_gr_init_gpc_mmu(struct gf100_gr *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index 4ca8ed15191c..de8b806b88fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -361,6 +361,5 @@ gk20a_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
 	if (ret)
 		return ret;
 
-
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
index 4dfa4513bb6c..6435f1257572 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -38,7 +38,7 @@ gm200_gr_rops(struct gf100_gr *gr)
 	return nvkm_rd32(gr->base.engine.subdev.device, 0x12006c);
 }
 
-static void
+void
 gm200_gr_init_gpc_mmu(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
new file mode 100644
index 000000000000..26ad79def0ff
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <nvif/class.h>
+
+/*******************************************************************************
+ * PGRAPH engine/subdev functions
+ ******************************************************************************/
+
+static void
+gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	/*XXX: otherwise identical to gm200 aside from mask.. do everywhere? */
+	const u32 fbp_count = nvkm_rd32(device, 0x12006c) & 0x0000000f;
+	nvkm_mask(device, 0x408850, 0x0000000f, fbp_count); /* zrop */
+	nvkm_mask(device, 0x408958, 0x0000000f, fbp_count); /* crop */
+}
+
+static int
+gp100_gr_init(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
+	u32 data[TPC_MAX / 8] = {};
+	u8  tpcnr[GPC_MAX];
+	int gpc, tpc, rop;
+	int i;
+
+	gr->func->init_gpc_mmu(gr);
+
+	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
+
+	nvkm_wr32(device, GPC_UNIT(0, 0x3018), 0x00000001);
+
+	memset(data, 0x00, sizeof(data));
+	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
+	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
+		do {
+			gpc = (gpc + 1) % gr->gpc_nr;
+		} while (!tpcnr[gpc]);
+		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
+
+		data[i / 8] |= tpc << ((i % 8) * 4);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
+	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
+	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
+	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
+			  gr->screen_tile_row_offset << 8 | gr->tpc_nr[gpc]);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
+							 gr->tpc_total);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
+	}
+
+	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
+	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
+	nvkm_wr32(device, GPC_BCAST(0x033c), nvkm_rd32(device, 0x100804));
+
+	gr->func->init_rop_active_fbps(gr);
+
+	nvkm_wr32(device, 0x400500, 0x00010001);
+	nvkm_wr32(device, 0x400100, 0xffffffff);
+	nvkm_wr32(device, 0x40013c, 0xffffffff);
+	nvkm_wr32(device, 0x400124, 0x00000002);
+	nvkm_wr32(device, 0x409c24, 0x000f0002);
+	nvkm_wr32(device, 0x405848, 0xc0000000);
+	nvkm_mask(device, 0x40584c, 0x00000000, 0x00000001);
+	nvkm_wr32(device, 0x404000, 0xc0000000);
+	nvkm_wr32(device, 0x404600, 0xc0000000);
+	nvkm_wr32(device, 0x408030, 0xc0000000);
+	nvkm_wr32(device, 0x404490, 0xc0000000);
+	nvkm_wr32(device, 0x406018, 0xc0000000);
+	nvkm_wr32(device, 0x407020, 0x40000000);
+	nvkm_wr32(device, 0x405840, 0xc0000000);
+	nvkm_wr32(device, 0x405844, 0x00ffffff);
+	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
+
+	nvkm_mask(device, 0x419c9c, 0x00010000, 0x00010000);
+	nvkm_mask(device, 0x419c9c, 0x00020000, 0x00020000);
+
+	gr->func->init_ppc_exceptions(gr);
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
+		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x430), 0xc0000000);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x00dffffe);
+			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x00000105);
+		}
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
+		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
+	}
+
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
+	}
+
+	nvkm_wr32(device, 0x400108, 0xffffffff);
+	nvkm_wr32(device, 0x400138, 0xffffffff);
+	nvkm_wr32(device, 0x400118, 0xffffffff);
+	nvkm_wr32(device, 0x400130, 0xffffffff);
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+
+	gf100_gr_zbc_init(gr);
+
+	return gf100_gr_init_ctxctl(gr);
+}
+
+static const struct gf100_gr_func
+gp100_gr = {
+	.init = gp100_gr_init,
+	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
+	.init_rop_active_fbps = gp100_gr_init_rop_active_fbps,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.rops = gm200_gr_rops,
+	.ppc_nr = 2,
+	.grctx = &gp100_grctx,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, PASCAL_A, &gf100_fermi },
+		{ -1, -1, PASCAL_COMPUTE_A },
+		{}
+	}
+};
+
+int
+gp100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+{
+	return gm200_gr_new_(&gp100_gr, device, index, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
index e15b9627b07e..f3c30b2a788e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
@@ -26,6 +26,49 @@
 #include <subdev/bios.h>
 #include <subdev/bios/bmp.h>
 #include <subdev/bios/bit.h>
+#include <subdev/bios/image.h>
+
+static bool
+nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size)
+{
+	u32 p = *addr;
+
+	if (*addr > bios->image0_size && bios->imaged_addr) {
+		*addr -= bios->image0_size;
+		*addr += bios->imaged_addr;
+	}
+
+	if (unlikely(*addr + size >= bios->size)) {
+		nvkm_error(&bios->subdev, "OOB %d %08x %08x\n", size, p, *addr);
+		return false;
+	}
+
+	return true;
+}
+
+u8
+nvbios_rd08(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 1)))
+		return bios->data[addr];
+	return 0x00;
+}
+
+u16
+nvbios_rd16(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 2)))
+		return get_unaligned_le16(&bios->data[addr]);
+	return 0x0000;
+}
+
+u32
+nvbios_rd32(struct nvkm_bios *bios, u32 addr)
+{
+	if (likely(nvbios_addr(bios, &addr, 4)))
+		return get_unaligned_le32(&bios->data[addr]);
+	return 0x00000000;
+}
 
 u8
 nvbios_checksum(const u8 *data, int size)
@@ -100,8 +143,9 @@ int
 nvkm_bios_new(struct nvkm_device *device, int index, struct nvkm_bios **pbios)
 {
 	struct nvkm_bios *bios;
+	struct nvbios_image image;
 	struct bit_entry bit_i;
-	int ret;
+	int ret, idx = 0;
 
 	if (!(bios = *pbios = kzalloc(sizeof(*bios), GFP_KERNEL)))
 		return -ENOMEM;
@@ -111,6 +155,19 @@ nvkm_bios_new(struct nvkm_device *device, int index, struct nvkm_bios **pbios)
 	if (ret)
 		return ret;
 
+	/* Some tables have weird pointers that need adjustment before
+	 * they're dereferenced.  I'm not entirely sure why...
+	 */
+	if (nvbios_image(bios, idx++, &image)) {
+		bios->image0_size = image.size;
+		while (nvbios_image(bios, idx++, &image)) {
+			if (image.type == 0xe0) {
+				bios->imaged_addr = image.base;
+				break;
+			}
+		}
+	}
+
 	/* detect type of vbios we're dealing with */
 	bios->bmp_offset = nvbios_findstr(bios->data, bios->size,
 					  "\xff\x7f""NV\0", 5);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
index a5e92135cd77..9efb1b48cd54 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/disp.c
@@ -141,7 +141,8 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 {
 	u16 data = nvbios_ocfg_entry(bios, outp, idx, ver, hdr, cnt, len);
 	if (data) {
-		info->match     = nvbios_rd16(bios, data + 0x00);
+		info->proto     = nvbios_rd08(bios, data + 0x00);
+		info->flags     = nvbios_rd16(bios, data + 0x01);
 		info->clkcmp[0] = nvbios_rd16(bios, data + 0x02);
 		info->clkcmp[1] = nvbios_rd16(bios, data + 0x04);
 	}
@@ -149,12 +150,13 @@ nvbios_ocfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 }
 
 u16
-nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u16 type,
+nvbios_ocfg_match(struct nvkm_bios *bios, u16 outp, u8 proto, u8 flags,
 		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ocfg *info)
 {
 	u16 data, idx = 0;
 	while ((data = nvbios_ocfg_parse(bios, outp, idx++, ver, hdr, cnt, len, info))) {
-		if (info->match == type)
+		if ((info->proto == proto || info->proto == 0xff) &&
+		    (info->flags == flags))
 			break;
 	}
 	return data;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index 05332476354a..d89e78c4e689 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -40,6 +40,7 @@ nvbios_dp_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 				case 0x30:
 				case 0x40:
 				case 0x41:
+				case 0x42:
 					*hdr = nvbios_rd08(bios, data + 0x01);
 					*len = nvbios_rd08(bios, data + 0x02);
 					*cnt = nvbios_rd08(bios, data + 0x03);
@@ -70,6 +71,7 @@ nvbios_dpout_entry(struct nvkm_bios *bios, u8 idx,
 			break;
 		case 0x40:
 		case 0x41:
+		case 0x42:
 			*hdr = nvbios_rd08(bios, data + 0x04);
 			*cnt = 0;
 			*len = 0;
@@ -109,6 +111,7 @@ nvbios_dpout_parse(struct nvkm_bios *bios, u8 idx,
 			break;
 		case 0x40:
 		case 0x41:
+		case 0x42:
 			info->flags     = nvbios_rd08(bios, data + 0x04);
 			info->script[0] = nvbios_rd16(bios, data + 0x05);
 			info->script[1] = nvbios_rd16(bios, data + 0x07);
@@ -180,6 +183,11 @@ nvbios_dpcfg_parse(struct nvkm_bios *bios, u16 outp, u8 idx,
 			info->pe    = nvbios_rd08(bios, data + 0x02);
 			info->tx_pu = nvbios_rd08(bios, data + 0x03);
 			break;
+		case 0x42:
+			info->dc    = nvbios_rd08(bios, data + 0x00);
+			info->pe    = nvbios_rd08(bios, data + 0x01);
+			info->tx_pu = nvbios_rd08(bios, data + 0x02);
+			break;
 		default:
 			data = 0x0000;
 			break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
index 74b14cf09308..1dbff7aeafec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/image.c
@@ -68,11 +68,16 @@ nvbios_imagen(struct nvkm_bios *bios, struct nvbios_image *image)
 bool
 nvbios_image(struct nvkm_bios *bios, int idx, struct nvbios_image *image)
 {
+	u32 imaged_addr = bios->imaged_addr;
 	memset(image, 0x00, sizeof(*image));
+	bios->imaged_addr = 0;
 	do {
 		image->base += image->size;
-		if (image->last || !nvbios_imagen(bios, image))
+		if (image->last || !nvbios_imagen(bios, image)) {
+			bios->imaged_addr = imaged_addr;
 			return false;
+		}
 	} while(idx--);
+	bios->imaged_addr = imaged_addr;
 	return true;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
index 91a7dc56e406..2ca23a9157ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pll.c
@@ -77,15 +77,17 @@ g84_pll_mapping[] = {
 	{}
 };
 
-static u16
+static u32
 pll_limits_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	struct bit_entry bit_C;
-	u16 data = 0x0000;
+	u32 data = 0x0000;
 
 	if (!bit_entry(bios, 'C', &bit_C)) {
 		if (bit_C.version == 1 && bit_C.length >= 10)
 			data = nvbios_rd16(bios, bit_C.offset + 8);
+		if (bit_C.version == 2 && bit_C.length >= 4)
+			data = nvbios_rd32(bios, bit_C.offset + 0);
 		if (data) {
 			*ver = nvbios_rd08(bios, data + 0);
 			*hdr = nvbios_rd08(bios, data + 1);
@@ -137,12 +139,12 @@ pll_map(struct nvkm_bios *bios)
 	}
 }
 
-static u16
+static u32
 pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 {
 	struct pll_mapping *map;
 	u8  hdr, cnt;
-	u16 data;
+	u32 data;
 
 	data = pll_limits_table(bios, ver, &hdr, &cnt, len);
 	if (data && *ver >= 0x30) {
@@ -160,7 +162,7 @@ pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 	map = pll_map(bios);
 	while (map && map->reg) {
 		if (map->reg == reg && *ver >= 0x20) {
-			u16 addr = (data += hdr);
+			u32 addr = (data += hdr);
 			*type = map->type;
 			while (cnt--) {
 				if (nvbios_rd32(bios, data) == map->reg)
@@ -179,12 +181,12 @@ pll_map_reg(struct nvkm_bios *bios, u32 reg, u32 *type, u8 *ver, u8 *len)
 	return 0x0000;
 }
 
-static u16
+static u32
 pll_map_type(struct nvkm_bios *bios, u8 type, u32 *reg, u8 *ver, u8 *len)
 {
 	struct pll_mapping *map;
 	u8  hdr, cnt;
-	u16 data;
+	u32 data;
 
 	data = pll_limits_table(bios, ver, &hdr, &cnt, len);
 	if (data && *ver >= 0x30) {
@@ -202,7 +204,7 @@ pll_map_type(struct nvkm_bios *bios, u8 type, u32 *reg, u8 *ver, u8 *len)
 	map = pll_map(bios);
 	while (map && map->reg) {
 		if (map->type == type && *ver >= 0x20) {
-			u16 addr = (data += hdr);
+			u32 addr = (data += hdr);
 			*reg = map->reg;
 			while (cnt--) {
 				if (nvbios_rd32(bios, data) == map->reg)
@@ -228,7 +230,7 @@ nvbios_pll_parse(struct nvkm_bios *bios, u32 type, struct nvbios_pll *info)
 	struct nvkm_device *device = subdev->device;
 	u8  ver, len;
 	u32 reg = type;
-	u16 data;
+	u32 data;
 
 	if (type > PLL_MAX) {
 		reg  = type;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
index c268e5afe852..b4a308f3cf7b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/pmu.c
@@ -26,21 +26,6 @@
 #include <subdev/bios/image.h>
 #include <subdev/bios/pmu.h>
 
-static u32
-weirdo_pointer(struct nvkm_bios *bios, u32 data)
-{
-	struct nvbios_image image;
-	int idx = 0;
-	if (nvbios_image(bios, idx++, &image)) {
-		data -= image.size;
-		while (nvbios_image(bios, idx++, &image)) {
-			if (image.type == 0xe0)
-				return image.base + data;
-		}
-	}
-	return 0;
-}
-
 u32
 nvbios_pmuTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
@@ -50,7 +35,7 @@ nvbios_pmuTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 	if (!bit_entry(bios, 'p', &bit_p)) {
 		if (bit_p.version == 2 && bit_p.length >= 4)
 			data = nvbios_rd32(bios, bit_p.offset + 0x00);
-		if ((data = weirdo_pointer(bios, data))) {
+		if (data) {
 			*ver = nvbios_rd08(bios, data + 0x00); /* maybe? */
 			*hdr = nvbios_rd08(bios, data + 0x01);
 			*len = nvbios_rd08(bios, data + 0x02);
@@ -97,8 +82,7 @@ nvbios_pmuRm(struct nvkm_bios *bios, u8 type, struct nvbios_pmuR *info)
 	u32 data;
 	memset(info, 0x00, sizeof(*info));
 	while ((data = nvbios_pmuEp(bios, idx++, &ver, &hdr, &pmuE))) {
-		if ( pmuE.type == type &&
-		    (data = weirdo_pointer(bios, pmuE.data))) {
+		if (pmuE.type == type && (data = pmuE.data)) {
 			info->init_addr_pmu = nvbios_rd32(bios, data + 0x08);
 			info->args_addr_pmu = nvbios_rd32(bios, data + 0x0c);
 			info->boot_addr     = data + 0x30;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
index d0ae7454764e..b57c370c725d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/rammap.c
@@ -30,11 +30,11 @@ nvbios_rammapTe(struct nvkm_bios *bios, u8 *ver, u8 *hdr,
 		u8 *cnt, u8 *len, u8 *snr, u8 *ssz)
 {
 	struct bit_entry bit_P;
-	u16 rammap = 0x0000;
+	u32 rammap = 0x0000;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 2)
-			rammap = nvbios_rd16(bios, bit_P.offset + 4);
+			rammap = nvbios_rd32(bios, bit_P.offset + 4);
 
 		if (rammap) {
 			*ver = nvbios_rd08(bios, rammap + 0);
@@ -61,7 +61,7 @@ nvbios_rammapEe(struct nvkm_bios *bios, int idx,
 		u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	u8  snr, ssz;
-	u16 rammap = nvbios_rammapTe(bios, ver, hdr, cnt, len, &snr, &ssz);
+	u32 rammap = nvbios_rammapTe(bios, ver, hdr, cnt, len, &snr, &ssz);
 	if (rammap && idx < *cnt) {
 		rammap = rammap + *hdr + (idx * (*len + (snr * ssz)));
 		*hdr = *len;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
index 78c449b417b7..89d5543118cf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
@@ -99,7 +99,7 @@ read_div(struct gf100_clk *clk, int doff, u32 dsrc, u32 dctl)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 	u32 ssrc = nvkm_rd32(device, dsrc + (doff * 4));
-	u32 sctl = nvkm_rd32(device, dctl + (doff * 4));
+	u32 sclk, sctl, sdiv = 2;
 
 	switch (ssrc & 0x00000003) {
 	case 0:
@@ -109,13 +109,21 @@ read_div(struct gf100_clk *clk, int doff, u32 dsrc, u32 dctl)
 	case 2:
 		return 100000;
 	case 3:
-		if (sctl & 0x80000000) {
-			u32 sclk = read_vco(clk, dsrc + (doff * 4));
-			u32 sdiv = (sctl & 0x0000003f) + 2;
-			return (sclk * 2) / sdiv;
+		sclk = read_vco(clk, dsrc + (doff * 4));
+
+		/* Memclk has doff of 0 despite its alt. location */
+		if (doff <= 2) {
+			sctl = nvkm_rd32(device, dctl + (doff * 4));
+
+			if (sctl & 0x80000000) {
+				if (ssrc & 0x100)
+					sctl >>= 8;
+
+				sdiv = (sctl & 0x3f) + 2;
+			}
 		}
 
-		return read_vco(clk, dsrc + (doff * 4));
+		return (sclk * 2) / sdiv;
 	default:
 		return 0;
 	}
@@ -366,11 +374,17 @@ gf100_clk_prog_2(struct gf100_clk *clk, int idx)
 		if (info->coef) {
 			nvkm_wr32(device, addr + 0x04, info->coef);
 			nvkm_mask(device, addr + 0x00, 0x00000001, 0x00000001);
+
+			/* Test PLL lock */
+			nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000000);
 			nvkm_msec(device, 2000,
 				if (nvkm_rd32(device, addr + 0x00) & 0x00020000)
 					break;
 			);
-			nvkm_mask(device, addr + 0x00, 0x00020004, 0x00000004);
+			nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000010);
+
+			/* Enable sync mode */
+			nvkm_mask(device, addr + 0x00, 0x00000004, 0x00000004);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
index 975c401bccab..06bc0d2d6ae1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
@@ -393,11 +393,17 @@ gk104_clk_prog_2(struct gk104_clk *clk, int idx)
 	if (info->coef) {
 		nvkm_wr32(device, addr + 0x04, info->coef);
 		nvkm_mask(device, addr + 0x00, 0x00000001, 0x00000001);
+
+		/* Test PLL lock */
+		nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000000);
 		nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, addr + 0x00) & 0x00020000)
 				break;
 		);
-		nvkm_mask(device, addr + 0x00, 0x00020004, 0x00000004);
+		nvkm_mask(device, addr + 0x00, 0x00000010, 0x00000010);
+
+		/* Enable sync mode */
+		nvkm_mask(device, addr + 0x00, 0x00000004, 0x00000004);
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
index 5f0ee24e31b8..218893e3e5f9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.c
@@ -28,69 +28,6 @@
 #include <core/tegra.h>
 #include <subdev/timer.h>
 
-#define KHZ (1000)
-#define MHZ (KHZ * 1000)
-
-#define MASK(w)	((1 << w) - 1)
-
-#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
-#define GPCPLL_CFG_ENABLE	BIT(0)
-#define GPCPLL_CFG_IDDQ		BIT(1)
-#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
-#define GPCPLL_CFG_LOCK		BIT(17)
-
-#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
-#define GPCPLL_COEFF_M_SHIFT	0
-#define GPCPLL_COEFF_M_WIDTH	8
-#define GPCPLL_COEFF_N_SHIFT	8
-#define GPCPLL_COEFF_N_WIDTH	8
-#define GPCPLL_COEFF_P_SHIFT	16
-#define GPCPLL_COEFF_P_WIDTH	6
-
-#define GPCPLL_CFG2			(SYS_GPCPLL_CFG_BASE + 0xc)
-#define GPCPLL_CFG2_SETUP2_SHIFT	16
-#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
-
-#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
-#define GPCPLL_CFG3_PLL_STEPB_SHIFT	16
-
-#define GPC_BCASE_GPCPLL_CFG_BASE		0x00132800
-#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
-#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
-#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
-#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
-#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
-
-#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
-#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
-
-#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
-#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
-#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
-#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
-#define GPC2CLK_OUT_VCODIV_WIDTH	6
-#define GPC2CLK_OUT_VCODIV_SHIFT	8
-#define GPC2CLK_OUT_VCODIV1		0
-#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
-					GPC2CLK_OUT_VCODIV_SHIFT)
-#define GPC2CLK_OUT_BYPDIV_WIDTH	6
-#define GPC2CLK_OUT_BYPDIV_SHIFT	0
-#define GPC2CLK_OUT_BYPDIV31		0x3c
-#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
-		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
-		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
-#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
-		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
-		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
-		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
-
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCASE_GPCPLL_CFG_BASE + 0xa0)
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
-#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
-	    (0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
-
 static const u8 _pl_to_div[] = {
 /* PL:   0, 1, 2, 3, 4, 5, 6,  7,  8,  9, 10, 11, 12, 13, 14 */
 /* p: */ 1, 2, 3, 4, 5, 6, 8, 10, 12, 16, 12, 16, 20, 24, 32,
@@ -124,7 +61,7 @@ static const struct gk20a_clk_pllg_params gk20a_pllg_params = {
 	.min_pl = 1, .max_pl = 32,
 };
 
-static void
+void
 gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
@@ -136,20 +73,33 @@ gk20a_pllg_read_mnp(struct gk20a_clk *clk, struct gk20a_pll *pll)
 	pll->pl = (val >> GPCPLL_COEFF_P_SHIFT) & MASK(GPCPLL_COEFF_P_WIDTH);
 }
 
-static u32
-gk20a_pllg_calc_rate(struct gk20a_clk *clk)
+void
+gk20a_pllg_write_mnp(struct gk20a_clk *clk, const struct gk20a_pll *pll)
+{
+	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
+
+	val = (pll->m & MASK(GPCPLL_COEFF_M_WIDTH)) << GPCPLL_COEFF_M_SHIFT;
+	val |= (pll->n & MASK(GPCPLL_COEFF_N_WIDTH)) << GPCPLL_COEFF_N_SHIFT;
+	val |= (pll->pl & MASK(GPCPLL_COEFF_P_WIDTH)) << GPCPLL_COEFF_P_SHIFT;
+	nvkm_wr32(device, GPCPLL_COEFF, val);
+}
+
+u32
+gk20a_pllg_calc_rate(struct gk20a_clk *clk, struct gk20a_pll *pll)
 {
 	u32 rate;
 	u32 divider;
 
-	rate = clk->parent_rate * clk->pll.n;
-	divider = clk->pll.m * clk->pl_to_div(clk->pll.pl);
+	rate = clk->parent_rate * pll->n;
+	divider = pll->m * clk->pl_to_div(pll->pl);
 
 	return rate / divider / 2;
 }
 
-static int
-gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
+int
+gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate,
+		    struct gk20a_pll *pll)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	u32 target_clk_f, ref_clk_f, target_freq;
@@ -163,16 +113,13 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 	target_clk_f = rate * 2 / KHZ;
 	ref_clk_f = clk->parent_rate / KHZ;
 
-	max_vco_f = clk->params->max_vco;
+	target_vco_f = target_clk_f + target_clk_f / 50;
+	max_vco_f = max(clk->params->max_vco, target_vco_f);
 	min_vco_f = clk->params->min_vco;
 	best_m = clk->params->max_m;
 	best_n = clk->params->min_n;
 	best_pl = clk->params->min_pl;
 
-	target_vco_f = target_clk_f + target_clk_f / 50;
-	if (max_vco_f < target_vco_f)
-		max_vco_f = target_vco_f;
-
 	/* min_pl <= high_pl <= max_pl */
 	high_pl = (max_vco_f + target_vco_f - 1) / target_vco_f;
 	high_pl = min(high_pl, clk->params->max_pl);
@@ -195,9 +142,7 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 		target_vco_f = target_clk_f * clk->pl_to_div(pl);
 
 		for (m = clk->params->min_m; m <= clk->params->max_m; m++) {
-			u32 u_f, vco_f;
-
-			u_f = ref_clk_f / m;
+			u32 u_f = ref_clk_f / m;
 
 			if (u_f < clk->params->min_u)
 				break;
@@ -211,6 +156,8 @@ gk20a_pllg_calc_mnp(struct gk20a_clk *clk, unsigned long rate)
 				break;
 
 			for (; n <= n2; n++) {
+				u32 vco_f;
+
 				if (n < clk->params->min_n)
 					continue;
 				if (n > clk->params->max_n)
@@ -247,16 +194,16 @@ found_match:
 			   "no best match for target @ %dMHz on gpc_pll",
 			   target_clk_f / KHZ);
 
-	clk->pll.m = best_m;
-	clk->pll.n = best_n;
-	clk->pll.pl = best_pl;
+	pll->m = best_m;
+	pll->n = best_n;
+	pll->pl = best_pl;
 
-	target_freq = gk20a_pllg_calc_rate(clk);
+	target_freq = gk20a_pllg_calc_rate(clk, pll);
 
 	nvkm_debug(subdev,
-		   "actual target freq %d MHz, M %d, N %d, PL %d(div%d)\n",
-		   target_freq / MHZ, clk->pll.m, clk->pll.n, clk->pll.pl,
-		   clk->pl_to_div(clk->pll.pl));
+		   "actual target freq %d KHz, M %d, N %d, PL %d(div%d)\n",
+		   target_freq / KHZ, pll->m, pll->n, pll->pl,
+		   clk->pl_to_div(pll->pl));
 	return 0;
 }
 
@@ -265,45 +212,36 @@ gk20a_pllg_slide(struct gk20a_clk *clk, u32 n)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 val;
-	int ramp_timeout;
+	struct gk20a_pll pll;
+	int ret = 0;
 
 	/* get old coefficients */
-	val = nvkm_rd32(device, GPCPLL_COEFF);
+	gk20a_pllg_read_mnp(clk, &pll);
 	/* do nothing if NDIV is the same */
-	if (n == ((val >> GPCPLL_COEFF_N_SHIFT) & MASK(GPCPLL_COEFF_N_WIDTH)))
+	if (n == pll.n)
 		return 0;
 
-	/* setup */
-	nvkm_mask(device, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT,
-		0x2b << GPCPLL_CFG2_PLL_STEPA_SHIFT);
-	nvkm_mask(device, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT,
-		0xb << GPCPLL_CFG3_PLL_STEPB_SHIFT);
-
 	/* pll slowdown mode */
 	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
 		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT),
 		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT));
 
 	/* new ndiv ready for ramp */
-	val = nvkm_rd32(device, GPCPLL_COEFF);
-	val &= ~(MASK(GPCPLL_COEFF_N_WIDTH) << GPCPLL_COEFF_N_SHIFT);
-	val |= (n & MASK(GPCPLL_COEFF_N_WIDTH)) << GPCPLL_COEFF_N_SHIFT;
+	pll.n = n;
 	udelay(1);
-	nvkm_wr32(device, GPCPLL_COEFF, val);
+	gk20a_pllg_write_mnp(clk, &pll);
 
 	/* dynamic ramp to new ndiv */
-	val = nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
-	val |= 0x1 << GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT;
 	udelay(1);
-	nvkm_wr32(device, GPCPLL_NDIV_SLOWDOWN, val);
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT),
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT));
 
-	for (ramp_timeout = 500; ramp_timeout > 0; ramp_timeout--) {
-		udelay(1);
-		val = nvkm_rd32(device, GPC_BCAST_NDIV_SLOWDOWN_DEBUG);
-		if (val & GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK)
-			break;
-	}
+	/* wait for ramping to complete */
+	if (nvkm_wait_usec(device, 500, GPC_BCAST_NDIV_SLOWDOWN_DEBUG,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK) < 0)
+		ret = -ETIMEDOUT;
 
 	/* exit slowdown mode */
 	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
@@ -311,21 +249,35 @@ gk20a_pllg_slide(struct gk20a_clk *clk, u32 n)
 		BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0);
 	nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
 
-	if (ramp_timeout <= 0) {
-		nvkm_error(subdev, "gpcpll dynamic ramp timeout\n");
-		return -ETIMEDOUT;
-	}
-
-	return 0;
+	return ret;
 }
 
-static void
+static int
 gk20a_pllg_enable(struct gk20a_clk *clk)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
 
 	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
 	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* enable lock detection */
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
+		val &= ~GPCPLL_CFG_LOCK_DET_OFF;
+		nvkm_wr32(device, GPCPLL_CFG, val);
+	}
+
+	/* wait for lock */
+	if (nvkm_wait_usec(device, 300, GPCPLL_CFG, GPCPLL_CFG_LOCK,
+			   GPCPLL_CFG_LOCK) < 0)
+		return -ETIMEDOUT;
+
+	/* switch to VCO mode */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
+		BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+
+	return 0;
 }
 
 static void
@@ -333,117 +285,81 @@ gk20a_pllg_disable(struct gk20a_clk *clk)
 {
 	struct nvkm_device *device = clk->base.subdev.device;
 
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
 	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
 	nvkm_rd32(device, GPCPLL_CFG);
 }
 
 static int
-_gk20a_pllg_program_mnp(struct gk20a_clk *clk, bool allow_slide)
+gk20a_pllg_program_mnp(struct gk20a_clk *clk, const struct gk20a_pll *pll)
 {
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 val, cfg;
-	struct gk20a_pll old_pll;
-	u32 n_lo;
-
-	/* get old coefficients */
-	gk20a_pllg_read_mnp(clk, &old_pll);
-
-	/* do NDIV slide if there is no change in M and PL */
-	cfg = nvkm_rd32(device, GPCPLL_CFG);
-	if (allow_slide && clk->pll.m == old_pll.m &&
-	    clk->pll.pl == old_pll.pl && (cfg & GPCPLL_CFG_ENABLE)) {
-		return gk20a_pllg_slide(clk, clk->pll.n);
-	}
-
-	/* slide down to NDIV_LO */
-	if (allow_slide && (cfg & GPCPLL_CFG_ENABLE)) {
-		int ret;
-
-		n_lo = DIV_ROUND_UP(old_pll.m * clk->params->min_vco,
-				    clk->parent_rate / KHZ);
-		ret = gk20a_pllg_slide(clk, n_lo);
+	struct gk20a_pll cur_pll;
+	int ret;
 
-		if (ret)
-			return ret;
-	}
+	gk20a_pllg_read_mnp(clk, &cur_pll);
 
-	/* split FO-to-bypass jump in halfs by setting out divider 1:2 */
+	/* split VCO-to-bypass jump in half by setting out divider 1:2 */
 	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
-		0x2 << GPC2CLK_OUT_VCODIV_SHIFT);
-
-	/* put PLL in bypass before programming it */
-	val = nvkm_rd32(device, SEL_VCO);
-	val &= ~(BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
 	udelay(2);
-	nvkm_wr32(device, SEL_VCO, val);
-
-	/* get out from IDDQ */
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_IDDQ) {
-		val &= ~GPCPLL_CFG_IDDQ;
-		nvkm_wr32(device, GPCPLL_CFG, val);
-		nvkm_rd32(device, GPCPLL_CFG);
-		udelay(2);
-	}
 
 	gk20a_pllg_disable(clk);
 
-	nvkm_debug(subdev, "%s: m=%d n=%d pl=%d\n", __func__,
-		   clk->pll.m, clk->pll.n, clk->pll.pl);
-
-	n_lo = DIV_ROUND_UP(clk->pll.m * clk->params->min_vco,
-			    clk->parent_rate / KHZ);
-	val = clk->pll.m << GPCPLL_COEFF_M_SHIFT;
-	val |= (allow_slide ? n_lo : clk->pll.n) << GPCPLL_COEFF_N_SHIFT;
-	val |= clk->pll.pl << GPCPLL_COEFF_P_SHIFT;
-	nvkm_wr32(device, GPCPLL_COEFF, val);
+	gk20a_pllg_write_mnp(clk, pll);
 
-	gk20a_pllg_enable(clk);
-
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_LOCK_DET_OFF) {
-		val &= ~GPCPLL_CFG_LOCK_DET_OFF;
-		nvkm_wr32(device, GPCPLL_CFG, val);
-	}
-
-	if (nvkm_usec(device, 300,
-		if (nvkm_rd32(device, GPCPLL_CFG) & GPCPLL_CFG_LOCK)
-			break;
-	) < 0)
-		return -ETIMEDOUT;
-
-	/* switch to VCO mode */
-	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
-		  BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+	ret = gk20a_pllg_enable(clk);
+	if (ret)
+		return ret;
 
 	/* restore out divider 1:1 */
-	val = nvkm_rd32(device, GPC2CLK_OUT);
-	if ((val & GPC2CLK_OUT_VCODIV_MASK) !=
-	    (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT)) {
-		val &= ~GPC2CLK_OUT_VCODIV_MASK;
-		val |= GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT;
-		udelay(2);
-		nvkm_wr32(device, GPC2CLK_OUT, val);
-		/* Intentional 2nd write to assure linear divider operation */
-		nvkm_wr32(device, GPC2CLK_OUT, val);
-		nvkm_rd32(device, GPC2CLK_OUT);
-	}
+	udelay(2);
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
 
-	/* slide up to new NDIV */
-	return allow_slide ? gk20a_pllg_slide(clk, clk->pll.n) : 0;
+	return 0;
 }
 
 static int
-gk20a_pllg_program_mnp(struct gk20a_clk *clk)
+gk20a_pllg_program_mnp_slide(struct gk20a_clk *clk, const struct gk20a_pll *pll)
 {
-	int err;
+	struct gk20a_pll cur_pll;
+	int ret;
 
-	err = _gk20a_pllg_program_mnp(clk, true);
-	if (err)
-		err = _gk20a_pllg_program_mnp(clk, false);
+	if (gk20a_pllg_is_enabled(clk)) {
+		gk20a_pllg_read_mnp(clk, &cur_pll);
+
+		/* just do NDIV slide if there is no change to M and PL */
+		if (pll->m == cur_pll.m && pll->pl == cur_pll.pl)
+			return gk20a_pllg_slide(clk, pll->n);
+
+		/* slide down to current NDIV_LO */
+		cur_pll.n = gk20a_pllg_n_lo(clk, &cur_pll);
+		ret = gk20a_pllg_slide(clk, cur_pll.n);
+		if (ret)
+			return ret;
+	}
+
+	/* program MNP with the new clock parameters and new NDIV_LO */
+	cur_pll = *pll;
+	cur_pll.n = gk20a_pllg_n_lo(clk, &cur_pll);
+	ret = gk20a_pllg_program_mnp(clk, &cur_pll);
+	if (ret)
+		return ret;
 
-	return err;
+	/* slide up to new NDIV */
+	return gk20a_pllg_slide(clk, pll->n);
 }
 
 static struct nvkm_pstate
@@ -546,13 +462,14 @@ gk20a_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 	struct gk20a_clk *clk = gk20a_clk(base);
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
+	struct gk20a_pll pll;
 
 	switch (src) {
 	case nv_clk_src_crystal:
 		return device->crystal;
 	case nv_clk_src_gpc:
-		gk20a_pllg_read_mnp(clk, &clk->pll);
-		return gk20a_pllg_calc_rate(clk) / GK20A_CLK_GPC_MDIV;
+		gk20a_pllg_read_mnp(clk, &pll);
+		return gk20a_pllg_calc_rate(clk, &pll) / GK20A_CLK_GPC_MDIV;
 	default:
 		nvkm_error(subdev, "invalid clock source %d\n", src);
 		return -EINVAL;
@@ -565,15 +482,20 @@ gk20a_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
 	struct gk20a_clk *clk = gk20a_clk(base);
 
 	return gk20a_pllg_calc_mnp(clk, cstate->domain[nv_clk_src_gpc] *
-					 GK20A_CLK_GPC_MDIV);
+					 GK20A_CLK_GPC_MDIV, &clk->pll);
 }
 
 int
 gk20a_clk_prog(struct nvkm_clk *base)
 {
 	struct gk20a_clk *clk = gk20a_clk(base);
+	int ret;
+
+	ret = gk20a_pllg_program_mnp_slide(clk, &clk->pll);
+	if (ret)
+		ret = gk20a_pllg_program_mnp(clk, &clk->pll);
 
-	return gk20a_pllg_program_mnp(clk);
+	return ret;
 }
 
 void
@@ -581,29 +503,62 @@ gk20a_clk_tidy(struct nvkm_clk *base)
 {
 }
 
+int
+gk20a_clk_setup_slide(struct gk20a_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 step_a, step_b;
+
+	switch (clk->parent_rate) {
+	case 12000000:
+	case 12800000:
+	case 13000000:
+		step_a = 0x2b;
+		step_b = 0x0b;
+		break;
+	case 19200000:
+		step_a = 0x12;
+		step_b = 0x08;
+		break;
+	case 38400000:
+		step_a = 0x04;
+		step_b = 0x05;
+		break;
+	default:
+		nvkm_error(subdev, "invalid parent clock rate %u KHz",
+			   clk->parent_rate / KHZ);
+		return -EINVAL;
+	}
+
+	nvkm_mask(device, GPCPLL_CFG2, 0xff << GPCPLL_CFG2_PLL_STEPA_SHIFT,
+		step_a << GPCPLL_CFG2_PLL_STEPA_SHIFT);
+	nvkm_mask(device, GPCPLL_CFG3, 0xff << GPCPLL_CFG3_PLL_STEPB_SHIFT,
+		step_b << GPCPLL_CFG3_PLL_STEPB_SHIFT);
+
+	return 0;
+}
+
 void
 gk20a_clk_fini(struct nvkm_clk *base)
 {
 	struct nvkm_device *device = base->subdev.device;
 	struct gk20a_clk *clk = gk20a_clk(base);
-	u32 val;
 
 	/* slide to VCO min */
-	val = nvkm_rd32(device, GPCPLL_CFG);
-	if (val & GPCPLL_CFG_ENABLE) {
+	if (gk20a_pllg_is_enabled(clk)) {
 		struct gk20a_pll pll;
 		u32 n_lo;
 
 		gk20a_pllg_read_mnp(clk, &pll);
-		n_lo = DIV_ROUND_UP(pll.m * clk->params->min_vco,
-				    clk->parent_rate / KHZ);
+		n_lo = gk20a_pllg_n_lo(clk, &pll);
 		gk20a_pllg_slide(clk, n_lo);
 	}
 
-	/* put PLL in bypass before disabling it */
-	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
-
 	gk20a_pllg_disable(clk);
+
+	/* set IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 1);
 }
 
 static int
@@ -614,9 +569,18 @@ gk20a_clk_init(struct nvkm_clk *base)
 	struct nvkm_device *device = subdev->device;
 	int ret;
 
+	/* get out from IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+	udelay(5);
+
 	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK,
 		  GPC2CLK_OUT_INIT_VAL);
 
+	ret = gk20a_clk_setup_slide(clk);
+	if (ret)
+		return ret;
+
 	/* Start with lowest frequency */
 	base->func->calc(base, &base->func->pstates[0].base);
 	ret = base->func->prog(&clk->base);
@@ -646,7 +610,7 @@ gk20a_clk = {
 };
 
 int
-_gk20a_clk_ctor(struct nvkm_device *device, int index,
+gk20a_clk_ctor(struct nvkm_device *device, int index,
 		const struct nvkm_clk_func *func,
 		const struct gk20a_clk_pllg_params *params,
 		struct gk20a_clk *clk)
@@ -685,7 +649,7 @@ gk20a_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	ret = _gk20a_clk_ctor(device, index, &gk20a_clk, &gk20a_pllg_params,
+	ret = gk20a_clk_ctor(device, index, &gk20a_clk, &gk20a_pllg_params,
 			      clk);
 
 	clk->pl_to_div = pl_to_div;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
index 13c46740197d..0d1450972162 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk20a.h
@@ -24,9 +24,79 @@
 #ifndef __NVKM_CLK_GK20A_H__
 #define __NVKM_CLK_GK20A_H__
 
+#define KHZ (1000)
+#define MHZ (KHZ * 1000)
+
+#define MASK(w)	((1 << (w)) - 1)
+
 #define GK20A_CLK_GPC_MDIV 1000
 
 #define SYS_GPCPLL_CFG_BASE	0x00137000
+#define GPCPLL_CFG		(SYS_GPCPLL_CFG_BASE + 0)
+#define GPCPLL_CFG_ENABLE	BIT(0)
+#define GPCPLL_CFG_IDDQ		BIT(1)
+#define GPCPLL_CFG_LOCK_DET_OFF	BIT(4)
+#define GPCPLL_CFG_LOCK		BIT(17)
+
+#define GPCPLL_CFG2		(SYS_GPCPLL_CFG_BASE + 0xc)
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_CFG3			(SYS_GPCPLL_CFG_BASE + 0x18)
+#define GPCPLL_CFG3_VCO_CTRL_SHIFT		0
+#define GPCPLL_CFG3_VCO_CTRL_WIDTH		9
+#define GPCPLL_CFG3_VCO_CTRL_MASK		\
+	(MASK(GPCPLL_CFG3_VCO_CTRL_WIDTH) << GPCPLL_CFG3_VCO_CTRL_SHIFT)
+#define GPCPLL_CFG3_PLL_STEPB_SHIFT		16
+#define GPCPLL_CFG3_PLL_STEPB_WIDTH		8
+
+#define GPCPLL_COEFF		(SYS_GPCPLL_CFG_BASE + 4)
+#define GPCPLL_COEFF_M_SHIFT	0
+#define GPCPLL_COEFF_M_WIDTH	8
+#define GPCPLL_COEFF_N_SHIFT	8
+#define GPCPLL_COEFF_N_WIDTH	8
+#define GPCPLL_COEFF_N_MASK	\
+	(MASK(GPCPLL_COEFF_N_WIDTH) << GPCPLL_COEFF_N_SHIFT)
+#define GPCPLL_COEFF_P_SHIFT	16
+#define GPCPLL_COEFF_P_WIDTH	6
+
+#define GPCPLL_NDIV_SLOWDOWN			(SYS_GPCPLL_CFG_BASE + 0x1c)
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_LO_SHIFT	0
+#define GPCPLL_NDIV_SLOWDOWN_NDIV_MID_SHIFT	8
+#define GPCPLL_NDIV_SLOWDOWN_STEP_SIZE_LO2MID_SHIFT	16
+#define GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT	22
+#define GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT	31
+
+#define GPC_BCAST_GPCPLL_CFG_BASE		0x00132800
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG	(GPC_BCAST_GPCPLL_CFG_BASE + 0xa0)
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT	24
+#define GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK \
+	(0x1 << GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_SHIFT)
+
+#define SEL_VCO				(SYS_GPCPLL_CFG_BASE + 0x100)
+#define SEL_VCO_GPC2CLK_OUT_SHIFT	0
+
+#define GPC2CLK_OUT			(SYS_GPCPLL_CFG_BASE + 0x250)
+#define GPC2CLK_OUT_SDIV14_INDIV4_WIDTH	1
+#define GPC2CLK_OUT_SDIV14_INDIV4_SHIFT	31
+#define GPC2CLK_OUT_SDIV14_INDIV4_MODE	1
+#define GPC2CLK_OUT_VCODIV_WIDTH	6
+#define GPC2CLK_OUT_VCODIV_SHIFT	8
+#define GPC2CLK_OUT_VCODIV1		0
+#define GPC2CLK_OUT_VCODIV2		2
+#define GPC2CLK_OUT_VCODIV_MASK		(MASK(GPC2CLK_OUT_VCODIV_WIDTH) << \
+					GPC2CLK_OUT_VCODIV_SHIFT)
+#define GPC2CLK_OUT_BYPDIV_WIDTH	6
+#define GPC2CLK_OUT_BYPDIV_SHIFT	0
+#define GPC2CLK_OUT_BYPDIV31		0x3c
+#define GPC2CLK_OUT_INIT_MASK	((MASK(GPC2CLK_OUT_SDIV14_INDIV4_WIDTH) << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT)\
+		| (MASK(GPC2CLK_OUT_VCODIV_WIDTH) << GPC2CLK_OUT_VCODIV_SHIFT)\
+		| (MASK(GPC2CLK_OUT_BYPDIV_WIDTH) << GPC2CLK_OUT_BYPDIV_SHIFT))
+#define GPC2CLK_OUT_INIT_VAL	((GPC2CLK_OUT_SDIV14_INDIV4_MODE << \
+		GPC2CLK_OUT_SDIV14_INDIV4_SHIFT) \
+		| (GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT) \
+		| (GPC2CLK_OUT_BYPDIV31 << GPC2CLK_OUT_BYPDIV_SHIFT))
 
 /* All frequencies in Khz */
 struct gk20a_clk_pllg_params {
@@ -54,7 +124,29 @@ struct gk20a_clk {
 };
 #define gk20a_clk(p) container_of((p), struct gk20a_clk, base)
 
-int _gk20a_clk_ctor(struct nvkm_device *, int, const struct nvkm_clk_func *,
+u32 gk20a_pllg_calc_rate(struct gk20a_clk *, struct gk20a_pll *);
+int gk20a_pllg_calc_mnp(struct gk20a_clk *, unsigned long, struct gk20a_pll *);
+void gk20a_pllg_read_mnp(struct gk20a_clk *, struct gk20a_pll *);
+void gk20a_pllg_write_mnp(struct gk20a_clk *, const struct gk20a_pll *);
+
+static inline bool
+gk20a_pllg_is_enabled(struct gk20a_clk *clk)
+{
+	struct nvkm_device *device = clk->base.subdev.device;
+	u32 val;
+
+	val = nvkm_rd32(device, GPCPLL_CFG);
+	return val & GPCPLL_CFG_ENABLE;
+}
+
+static inline u32
+gk20a_pllg_n_lo(struct gk20a_clk *clk, struct gk20a_pll *pll)
+{
+	return DIV_ROUND_UP(pll->m * clk->params->min_vco,
+			    clk->parent_rate / KHZ);
+}
+
+int gk20a_clk_ctor(struct nvkm_device *, int, const struct nvkm_clk_func *,
 		    const struct gk20a_clk_pllg_params *, struct gk20a_clk *);
 void gk20a_clk_fini(struct nvkm_clk *);
 int gk20a_clk_read(struct nvkm_clk *, enum nv_clk_src);
@@ -62,4 +154,6 @@ int gk20a_clk_calc(struct nvkm_clk *, struct nvkm_cstate *);
 int gk20a_clk_prog(struct nvkm_clk *);
 void gk20a_clk_tidy(struct nvkm_clk *);
 
+int gk20a_clk_setup_slide(struct gk20a_clk *);
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
index 71b2bbb61973..b284e949f732 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gm20b.c
@@ -21,20 +21,123 @@
  */
 
 #include <subdev/clk.h>
+#include <subdev/volt.h>
+#include <subdev/timer.h>
 #include <core/device.h>
+#include <core/tegra.h>
 
 #include "priv.h"
 #include "gk20a.h"
 
-#define KHZ (1000)
-#define MHZ (KHZ * 1000)
-
-#define MASK(w)	((1 << w) - 1)
+#define GPCPLL_CFG_SYNC_MODE	BIT(2)
 
 #define BYPASSCTRL_SYS	(SYS_GPCPLL_CFG_BASE + 0x340)
 #define BYPASSCTRL_SYS_GPCPLL_SHIFT	0
 #define BYPASSCTRL_SYS_GPCPLL_WIDTH	1
 
+#define GPCPLL_CFG2_SDM_DIN_SHIFT	0
+#define GPCPLL_CFG2_SDM_DIN_WIDTH	8
+#define GPCPLL_CFG2_SDM_DIN_MASK	\
+	(MASK(GPCPLL_CFG2_SDM_DIN_WIDTH) << GPCPLL_CFG2_SDM_DIN_SHIFT)
+#define GPCPLL_CFG2_SDM_DIN_NEW_SHIFT	8
+#define GPCPLL_CFG2_SDM_DIN_NEW_WIDTH	15
+#define GPCPLL_CFG2_SDM_DIN_NEW_MASK	\
+	(MASK(GPCPLL_CFG2_SDM_DIN_NEW_WIDTH) << GPCPLL_CFG2_SDM_DIN_NEW_SHIFT)
+#define GPCPLL_CFG2_SETUP2_SHIFT	16
+#define GPCPLL_CFG2_PLL_STEPA_SHIFT	24
+
+#define GPCPLL_DVFS0	(SYS_GPCPLL_CFG_BASE + 0x10)
+#define GPCPLL_DVFS0_DFS_COEFF_SHIFT	0
+#define GPCPLL_DVFS0_DFS_COEFF_WIDTH	7
+#define GPCPLL_DVFS0_DFS_COEFF_MASK	\
+	(MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH) << GPCPLL_DVFS0_DFS_COEFF_SHIFT)
+#define GPCPLL_DVFS0_DFS_DET_MAX_SHIFT	8
+#define GPCPLL_DVFS0_DFS_DET_MAX_WIDTH	7
+#define GPCPLL_DVFS0_DFS_DET_MAX_MASK	\
+	(MASK(GPCPLL_DVFS0_DFS_DET_MAX_WIDTH) << GPCPLL_DVFS0_DFS_DET_MAX_SHIFT)
+
+#define GPCPLL_DVFS1		(SYS_GPCPLL_CFG_BASE + 0x14)
+#define GPCPLL_DVFS1_DFS_EXT_DET_SHIFT		0
+#define GPCPLL_DVFS1_DFS_EXT_DET_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_SHIFT		7
+#define GPCPLL_DVFS1_DFS_EXT_STRB_WIDTH		1
+#define GPCPLL_DVFS1_DFS_EXT_CAL_SHIFT		8
+#define GPCPLL_DVFS1_DFS_EXT_CAL_WIDTH		7
+#define GPCPLL_DVFS1_DFS_EXT_SEL_SHIFT		15
+#define GPCPLL_DVFS1_DFS_EXT_SEL_WIDTH		1
+#define GPCPLL_DVFS1_DFS_CTRL_SHIFT		16
+#define GPCPLL_DVFS1_DFS_CTRL_WIDTH		12
+#define GPCPLL_DVFS1_EN_SDM_SHIFT		28
+#define GPCPLL_DVFS1_EN_SDM_WIDTH		1
+#define GPCPLL_DVFS1_EN_SDM_BIT			BIT(28)
+#define GPCPLL_DVFS1_EN_DFS_SHIFT		29
+#define GPCPLL_DVFS1_EN_DFS_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_BIT			BIT(29)
+#define GPCPLL_DVFS1_EN_DFS_CAL_SHIFT		30
+#define GPCPLL_DVFS1_EN_DFS_CAL_WIDTH		1
+#define GPCPLL_DVFS1_EN_DFS_CAL_BIT		BIT(30)
+#define GPCPLL_DVFS1_DFS_CAL_DONE_SHIFT		31
+#define GPCPLL_DVFS1_DFS_CAL_DONE_WIDTH		1
+#define GPCPLL_DVFS1_DFS_CAL_DONE_BIT		BIT(31)
+
+#define GPC_BCAST_GPCPLL_DVFS2	(GPC_BCAST_GPCPLL_CFG_BASE + 0x20)
+#define GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT	BIT(16)
+
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT	24
+#define GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH	7
+
+#define DFS_DET_RANGE	6	/* -2^6 ... 2^6-1 */
+#define SDM_DIN_RANGE	12	/* -2^12 ... 2^12-1 */
+
+struct gm20b_clk_dvfs_params {
+	s32 coeff_slope;
+	s32 coeff_offs;
+	u32 vco_ctrl;
+};
+
+static const struct gm20b_clk_dvfs_params gm20b_dvfs_params = {
+	.coeff_slope = -165230,
+	.coeff_offs = 214007,
+	.vco_ctrl = 0x7 << 3,
+};
+
+/*
+ * base.n is now the *integer* part of the N factor.
+ * sdm_din contains n's decimal part.
+ */
+struct gm20b_pll {
+	struct gk20a_pll base;
+	u32 sdm_din;
+};
+
+struct gm20b_clk_dvfs {
+	u32 dfs_coeff;
+	s32 dfs_det_max;
+	s32 dfs_ext_cal;
+};
+
+struct gm20b_clk {
+	/* currently applied parameters */
+	struct gk20a_clk base;
+	struct gm20b_clk_dvfs dvfs;
+	u32 uv;
+
+	/* new parameters to apply */
+	struct gk20a_pll new_pll;
+	struct gm20b_clk_dvfs new_dvfs;
+	u32 new_uv;
+
+	const struct gm20b_clk_dvfs_params *dvfs_params;
+
+	/* fused parameters */
+	s32 uvdet_slope;
+	s32 uvdet_offs;
+
+	/* safe frequency we can use at minimum voltage */
+	u32 safe_fmax_vmin;
+};
+#define gm20b_clk(p) container_of((gk20a_clk(p)), struct gm20b_clk, base)
+
 static u32 pl_to_div(u32 pl)
 {
 	return pl;
@@ -53,6 +156,484 @@ static const struct gk20a_clk_pllg_params gm20b_pllg_params = {
 	.min_pl = 1, .max_pl = 31,
 };
 
+static void
+gm20b_pllg_read_mnp(struct gm20b_clk *clk, struct gm20b_pll *pll)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 val;
+
+	gk20a_pllg_read_mnp(&clk->base, &pll->base);
+	val = nvkm_rd32(device, GPCPLL_CFG2);
+	pll->sdm_din = (val >> GPCPLL_CFG2_SDM_DIN_SHIFT) &
+		       MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+}
+
+static void
+gm20b_pllg_write_mnp(struct gm20b_clk *clk, const struct gm20b_pll *pll)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_MASK,
+		  pll->sdm_din << GPCPLL_CFG2_SDM_DIN_SHIFT);
+	gk20a_pllg_write_mnp(&clk->base, &pll->base);
+}
+
+/*
+ * Determine DFS_COEFF for the requested voltage. Always select external
+ * calibration override equal to the voltage, and set maximum detection
+ * limit "0" (to make sure that PLL output remains under F/V curve when
+ * voltage increases).
+ */
+static void
+gm20b_dvfs_calc_det_coeff(struct gm20b_clk *clk, s32 uv,
+			  struct gm20b_clk_dvfs *dvfs)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	const struct gm20b_clk_dvfs_params *p = clk->dvfs_params;
+	u32 coeff;
+	/* Work with mv as uv would likely trigger an overflow */
+	s32 mv = DIV_ROUND_CLOSEST(uv, 1000);
+
+	/* coeff = slope * voltage + offset */
+	coeff = DIV_ROUND_CLOSEST(mv * p->coeff_slope, 1000) + p->coeff_offs;
+	coeff = DIV_ROUND_CLOSEST(coeff, 1000);
+	dvfs->dfs_coeff = min_t(u32, coeff, MASK(GPCPLL_DVFS0_DFS_COEFF_WIDTH));
+
+	dvfs->dfs_ext_cal = DIV_ROUND_CLOSEST(uv - clk->uvdet_offs,
+					     clk->uvdet_slope);
+	/* should never happen */
+	if (abs(dvfs->dfs_ext_cal) >= BIT(DFS_DET_RANGE))
+		nvkm_error(subdev, "dfs_ext_cal overflow!\n");
+
+	dvfs->dfs_det_max = 0;
+
+	nvkm_debug(subdev, "%s uv: %d coeff: %x, ext_cal: %d, det_max: %d\n",
+		   __func__, uv, dvfs->dfs_coeff, dvfs->dfs_ext_cal,
+		   dvfs->dfs_det_max);
+}
+
+/*
+ * Solve equation for integer and fractional part of the effective NDIV:
+ *
+ * n_eff = n_int + 1/2 + (SDM_DIN / 2^(SDM_DIN_RANGE + 1)) +
+ *         (DVFS_COEFF * DVFS_DET_DELTA) / 2^DFS_DET_RANGE
+ *
+ * The SDM_DIN LSB is finally shifted out, since it is not accessible by sw.
+ */
+static void
+gm20b_dvfs_calc_ndiv(struct gm20b_clk *clk, u32 n_eff, u32 *n_int, u32 *sdm_din)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	const struct gk20a_clk_pllg_params *p = clk->base.params;
+	u32 n;
+	s32 det_delta;
+	u32 rem, rem_range;
+
+	/* calculate current ext_cal and subtract previous one */
+	det_delta = DIV_ROUND_CLOSEST(((s32)clk->uv) - clk->uvdet_offs,
+				      clk->uvdet_slope);
+	det_delta -= clk->dvfs.dfs_ext_cal;
+	det_delta = min(det_delta, clk->dvfs.dfs_det_max);
+	det_delta *= clk->dvfs.dfs_coeff;
+
+	/* integer part of n */
+	n = (n_eff << DFS_DET_RANGE) - det_delta;
+	/* should never happen! */
+	if (n <= 0) {
+		nvkm_error(subdev, "ndiv <= 0 - setting to 1...\n");
+		n = 1 << DFS_DET_RANGE;
+	}
+	if (n >> DFS_DET_RANGE > p->max_n) {
+		nvkm_error(subdev, "ndiv > max_n - setting to max_n...\n");
+		n = p->max_n << DFS_DET_RANGE;
+	}
+	*n_int = n >> DFS_DET_RANGE;
+
+	/* fractional part of n */
+	rem = ((u32)n) & MASK(DFS_DET_RANGE);
+	rem_range = SDM_DIN_RANGE + 1 - DFS_DET_RANGE;
+	/* subtract 2^SDM_DIN_RANGE to account for the 1/2 of the equation */
+	rem = (rem << rem_range) - BIT(SDM_DIN_RANGE);
+	/* lose 8 LSB and clip - sdm_din only keeps the most significant byte */
+	*sdm_din = (rem >> BITS_PER_BYTE) & MASK(GPCPLL_CFG2_SDM_DIN_WIDTH);
+
+	nvkm_debug(subdev, "%s n_eff: %d, n_int: %d, sdm_din: %d\n", __func__,
+		   n_eff, *n_int, *sdm_din);
+}
+
+static int
+gm20b_pllg_slide(struct gm20b_clk *clk, u32 n)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_pll pll;
+	u32 n_int, sdm_din;
+	int ret = 0;
+
+	/* calculate the new n_int/sdm_din for this n/uv */
+	gm20b_dvfs_calc_ndiv(clk, n, &n_int, &sdm_din);
+
+	/* get old coefficients */
+	gm20b_pllg_read_mnp(clk, &pll);
+	/* do nothing if NDIV is the same */
+	if (n_int == pll.base.n && sdm_din == pll.sdm_din)
+		return 0;
+
+	/* pll slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT),
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT));
+
+	/* new ndiv ready for ramp */
+	/* in DVFS mode SDM is updated via "new" field */
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_NEW_MASK,
+		  sdm_din << GPCPLL_CFG2_SDM_DIN_NEW_SHIFT);
+	pll.base.n = n_int;
+	udelay(1);
+	gk20a_pllg_write_mnp(&clk->base, &pll.base);
+
+	/* dynamic ramp to new ndiv */
+	udelay(1);
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT),
+		  BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT));
+
+	/* wait for ramping to complete */
+	if (nvkm_wait_usec(device, 500, GPC_BCAST_NDIV_SLOWDOWN_DEBUG,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK,
+		GPC_BCAST_NDIV_SLOWDOWN_DEBUG_PLL_DYNRAMP_DONE_SYNCED_MASK) < 0)
+		ret = -ETIMEDOUT;
+
+	/* in DVFS mode complete SDM update */
+	nvkm_mask(device, GPCPLL_CFG2, GPCPLL_CFG2_SDM_DIN_MASK,
+		  sdm_din << GPCPLL_CFG2_SDM_DIN_SHIFT);
+
+	/* exit slowdown mode */
+	nvkm_mask(device, GPCPLL_NDIV_SLOWDOWN,
+		BIT(GPCPLL_NDIV_SLOWDOWN_SLOWDOWN_USING_PLL_SHIFT) |
+		BIT(GPCPLL_NDIV_SLOWDOWN_EN_DYNRAMP_SHIFT), 0);
+	nvkm_rd32(device, GPCPLL_NDIV_SLOWDOWN);
+
+	return ret;
+}
+
+static int
+gm20b_pllg_enable(struct gm20b_clk *clk)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, GPCPLL_CFG_ENABLE);
+	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* In DVFS mode lock cannot be used - so just delay */
+	udelay(40);
+
+	/* set SYNC_MODE for glitchless switch out of bypass */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_SYNC_MODE,
+		       GPCPLL_CFG_SYNC_MODE);
+	nvkm_rd32(device, GPCPLL_CFG);
+
+	/* switch to VCO mode */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT),
+		  BIT(SEL_VCO_GPC2CLK_OUT_SHIFT));
+
+	return 0;
+}
+
+static void
+gm20b_pllg_disable(struct gm20b_clk *clk)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* put PLL in bypass before disabling it */
+	nvkm_mask(device, SEL_VCO, BIT(SEL_VCO_GPC2CLK_OUT_SHIFT), 0);
+
+	/* clear SYNC_MODE before disabling PLL */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_SYNC_MODE, 0);
+
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_ENABLE, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+}
+
+static int
+gm20b_pllg_program_mnp(struct gm20b_clk *clk, const struct gk20a_pll *pll)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gm20b_pll cur_pll;
+	u32 n_int, sdm_din;
+	/* if we only change pdiv, we can do a glitchless transition */
+	bool pdiv_only;
+	int ret;
+
+	gm20b_dvfs_calc_ndiv(clk, pll->n, &n_int, &sdm_din);
+	gm20b_pllg_read_mnp(clk, &cur_pll);
+	pdiv_only = cur_pll.base.n == n_int && cur_pll.sdm_din == sdm_din &&
+		    cur_pll.base.m == pll->m;
+
+	/* need full sequence if clock not enabled yet */
+	if (!gk20a_pllg_is_enabled(&clk->base))
+		pdiv_only = false;
+
+	/* split VCO-to-bypass jump in half by setting out divider 1:2 */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV2 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
+	udelay(2);
+
+	if (pdiv_only) {
+		u32 old = cur_pll.base.pl;
+		u32 new = pll->pl;
+
+		/*
+		 * we can do a glitchless transition only if the old and new PL
+		 * parameters share at least one bit set to 1. If this is not
+		 * the case, calculate and program an interim PL that will allow
+		 * us to respect that rule.
+		 */
+		if ((old & new) == 0) {
+			cur_pll.base.pl = min(old | BIT(ffs(new) - 1),
+					      new | BIT(ffs(old) - 1));
+			gk20a_pllg_write_mnp(&clk->base, &cur_pll.base);
+		}
+
+		cur_pll.base.pl = new;
+		gk20a_pllg_write_mnp(&clk->base, &cur_pll.base);
+	} else {
+		/* disable before programming if more than pdiv changes */
+		gm20b_pllg_disable(clk);
+
+		cur_pll.base = *pll;
+		cur_pll.base.n = n_int;
+		cur_pll.sdm_din = sdm_din;
+		gm20b_pllg_write_mnp(clk, &cur_pll);
+
+		ret = gm20b_pllg_enable(clk);
+		if (ret)
+			return ret;
+	}
+
+	/* restore out divider 1:1 */
+	udelay(2);
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	/* Intentional 2nd write to assure linear divider operation */
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_VCODIV_MASK,
+		  GPC2CLK_OUT_VCODIV1 << GPC2CLK_OUT_VCODIV_SHIFT);
+	nvkm_rd32(device, GPC2CLK_OUT);
+
+	return 0;
+}
+
+static int
+gm20b_pllg_program_mnp_slide(struct gm20b_clk *clk, const struct gk20a_pll *pll)
+{
+	struct gk20a_pll cur_pll;
+	int ret;
+
+	if (gk20a_pllg_is_enabled(&clk->base)) {
+		gk20a_pllg_read_mnp(&clk->base, &cur_pll);
+
+		/* just do NDIV slide if there is no change to M and PL */
+		if (pll->m == cur_pll.m && pll->pl == cur_pll.pl)
+			return gm20b_pllg_slide(clk, pll->n);
+
+		/* slide down to current NDIV_LO */
+		cur_pll.n = gk20a_pllg_n_lo(&clk->base, &cur_pll);
+		ret = gm20b_pllg_slide(clk, cur_pll.n);
+		if (ret)
+			return ret;
+	}
+
+	/* program MNP with the new clock parameters and new NDIV_LO */
+	cur_pll = *pll;
+	cur_pll.n = gk20a_pllg_n_lo(&clk->base, &cur_pll);
+	ret = gm20b_pllg_program_mnp(clk, &cur_pll);
+	if (ret)
+		return ret;
+
+	/* slide up to new NDIV */
+	return gm20b_pllg_slide(clk, pll->n);
+}
+
+static int
+gm20b_clk_calc(struct nvkm_clk *base, struct nvkm_cstate *cstate)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	struct nvkm_subdev *subdev = &base->subdev;
+	struct nvkm_volt *volt = base->subdev.device->volt;
+	int ret;
+
+	ret = gk20a_pllg_calc_mnp(&clk->base, cstate->domain[nv_clk_src_gpc] *
+					     GK20A_CLK_GPC_MDIV, &clk->new_pll);
+	if (ret)
+		return ret;
+
+	clk->new_uv = volt->vid[cstate->voltage].uv;
+	gm20b_dvfs_calc_det_coeff(clk, clk->new_uv, &clk->new_dvfs);
+
+	nvkm_debug(subdev, "%s uv: %d uv\n", __func__, clk->new_uv);
+
+	return 0;
+}
+
+/*
+ * Compute PLL parameters that are always safe for the current voltage
+ */
+static void
+gm20b_dvfs_calc_safe_pll(struct gm20b_clk *clk, struct gk20a_pll *pll)
+{
+	u32 rate = gk20a_pllg_calc_rate(&clk->base, pll) / KHZ;
+	u32 parent_rate = clk->base.parent_rate / KHZ;
+	u32 nmin, nsafe;
+
+	/* remove a safe margin of 10% */
+	if (rate > clk->safe_fmax_vmin)
+		rate = rate * (100 - 10) / 100;
+
+	/* gpc2clk */
+	rate *= 2;
+
+	nmin = DIV_ROUND_UP(pll->m * clk->base.params->min_vco, parent_rate);
+	nsafe = pll->m * rate / (clk->base.parent_rate);
+
+	if (nsafe < nmin) {
+		pll->pl = DIV_ROUND_UP(nmin * parent_rate, pll->m * rate);
+		nsafe = nmin;
+	}
+
+	pll->n = nsafe;
+}
+
+static void
+gm20b_dvfs_program_coeff(struct gm20b_clk *clk, u32 coeff)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	nvkm_mask(device, GPCPLL_DVFS0, GPCPLL_DVFS0_DFS_COEFF_MASK,
+		  coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT);
+
+	udelay(1);
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT, 0);
+}
+
+static void
+gm20b_dvfs_program_ext_cal(struct gm20b_clk *clk, u32 dfs_det_cal)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+	u32 val;
+
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2, MASK(DFS_DET_RANGE + 1),
+		  dfs_det_cal);
+	udelay(1);
+
+	val = nvkm_rd32(device, GPCPLL_DVFS1);
+	if (!(val & BIT(25))) {
+		/* Use external value to overwrite calibration value */
+		val |= BIT(25) | BIT(16);
+		nvkm_wr32(device, GPCPLL_DVFS1, val);
+	}
+}
+
+static void
+gm20b_dvfs_program_dfs_detection(struct gm20b_clk *clk,
+				 struct gm20b_clk_dvfs *dvfs)
+{
+	struct nvkm_device *device = clk->base.base.subdev.device;
+
+	/* strobe to read external DFS coefficient */
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT);
+
+	nvkm_mask(device, GPCPLL_DVFS0,
+		  GPCPLL_DVFS0_DFS_COEFF_MASK | GPCPLL_DVFS0_DFS_DET_MAX_MASK,
+		  dvfs->dfs_coeff << GPCPLL_DVFS0_DFS_COEFF_SHIFT |
+		  dvfs->dfs_det_max << GPCPLL_DVFS0_DFS_DET_MAX_SHIFT);
+
+	udelay(1);
+	nvkm_mask(device, GPC_BCAST_GPCPLL_DVFS2,
+		  GPC_BCAST_GPCPLL_DVFS2_DFS_EXT_STROBE_BIT, 0);
+
+	gm20b_dvfs_program_ext_cal(clk, dvfs->dfs_ext_cal);
+}
+
+static int
+gm20b_clk_prog(struct nvkm_clk *base)
+{
+	struct gm20b_clk *clk = gm20b_clk(base);
+	u32 cur_freq;
+	int ret;
+
+	/* No change in DVFS settings? */
+	if (clk->uv == clk->new_uv)
+		goto prog;
+
+	/*
+	 * Interim step for changing DVFS detection settings: low enough
+	 * frequency to be safe at at DVFS coeff = 0.
+	 *
+	 * 1. If voltage is increasing:
+	 * - safe frequency target matches the lowest - old - frequency
+	 * - DVFS settings are still old
+	 * - Voltage already increased to new level by volt, but maximum
+	 *   detection limit assures PLL output remains under F/V curve
+	 *
+	 * 2. If voltage is decreasing:
+	 * - safe frequency target matches the lowest - new - frequency
+	 * - DVFS settings are still old
+	 * - Voltage is also old, it will be lowered by volt afterwards
+	 *
+	 * Interim step can be skipped if old frequency is below safe minimum,
+	 * i.e., it is low enough to be safe at any voltage in operating range
+	 * with zero DVFS coefficient.
+	 */
+	cur_freq = nvkm_clk_read(&clk->base.base, nv_clk_src_gpc);
+	if (cur_freq > clk->safe_fmax_vmin) {
+		struct gk20a_pll pll_safe;
+
+		if (clk->uv < clk->new_uv)
+			/* voltage will raise: safe frequency is current one */
+			pll_safe = clk->base.pll;
+		else
+			/* voltage will drop: safe frequency is new one */
+			pll_safe = clk->new_pll;
+
+		gm20b_dvfs_calc_safe_pll(clk, &pll_safe);
+		ret = gm20b_pllg_program_mnp_slide(clk, &pll_safe);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * DVFS detection settings transition:
+	 * - Set DVFS coefficient zero
+	 * - Set calibration level to new voltage
+	 * - Set DVFS coefficient to match new voltage
+	 */
+	gm20b_dvfs_program_coeff(clk, 0);
+	gm20b_dvfs_program_ext_cal(clk, clk->new_dvfs.dfs_ext_cal);
+	gm20b_dvfs_program_coeff(clk, clk->new_dvfs.dfs_coeff);
+	gm20b_dvfs_program_dfs_detection(clk, &clk->new_dvfs);
+
+prog:
+	clk->uv = clk->new_uv;
+	clk->dvfs = clk->new_dvfs;
+	clk->base.pll = clk->new_pll;
+
+	return gm20b_pllg_program_mnp_slide(clk, &clk->base.pll);
+}
+
 static struct nvkm_pstate
 gm20b_pstates[] = {
 	{
@@ -133,9 +714,99 @@ gm20b_pstates[] = {
 			.voltage = 12,
 		},
 	},
-
 };
 
+static void
+gm20b_clk_fini(struct nvkm_clk *base)
+{
+	struct nvkm_device *device = base->subdev.device;
+	struct gm20b_clk *clk = gm20b_clk(base);
+
+	/* slide to VCO min */
+	if (gk20a_pllg_is_enabled(&clk->base)) {
+		struct gk20a_pll pll;
+		u32 n_lo;
+
+		gk20a_pllg_read_mnp(&clk->base, &pll);
+		n_lo = gk20a_pllg_n_lo(&clk->base, &pll);
+		gm20b_pllg_slide(clk, n_lo);
+	}
+
+	gm20b_pllg_disable(clk);
+
+	/* set IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 1);
+}
+
+static int
+gm20b_clk_init_dvfs(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_device *device = subdev->device;
+	bool fused = clk->uvdet_offs && clk->uvdet_slope;
+	static const s32 ADC_SLOPE_UV = 10000; /* default ADC detection slope */
+	u32 data;
+	int ret;
+
+	/* Enable NA DVFS */
+	nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_BIT,
+		  GPCPLL_DVFS1_EN_DFS_BIT);
+
+	/* Set VCO_CTRL */
+	if (clk->dvfs_params->vco_ctrl)
+		nvkm_mask(device, GPCPLL_CFG3, GPCPLL_CFG3_VCO_CTRL_MASK,
+		      clk->dvfs_params->vco_ctrl << GPCPLL_CFG3_VCO_CTRL_SHIFT);
+
+	if (fused) {
+		/* Start internal calibration, but ignore results */
+		nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_CAL_BIT,
+			  GPCPLL_DVFS1_EN_DFS_CAL_BIT);
+
+		/* got uvdev parameters from fuse, skip calibration */
+		goto calibrated;
+	}
+
+	/*
+	 * If calibration parameters are not fused, start internal calibration,
+	 * wait for completion, and use results along with default slope to
+	 * calculate ADC offset during boot.
+	 */
+	nvkm_mask(device, GPCPLL_DVFS1, GPCPLL_DVFS1_EN_DFS_CAL_BIT,
+			  GPCPLL_DVFS1_EN_DFS_CAL_BIT);
+
+	/* Wait for internal calibration done (spec < 2us). */
+	ret = nvkm_wait_usec(device, 10, GPCPLL_DVFS1,
+			     GPCPLL_DVFS1_DFS_CAL_DONE_BIT,
+			     GPCPLL_DVFS1_DFS_CAL_DONE_BIT);
+	if (ret < 0) {
+		nvkm_error(subdev, "GPCPLL calibration timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	data = nvkm_rd32(device, GPCPLL_CFG3) >>
+			 GPCPLL_CFG3_PLL_DFS_TESTOUT_SHIFT;
+	data &= MASK(GPCPLL_CFG3_PLL_DFS_TESTOUT_WIDTH);
+
+	clk->uvdet_slope = ADC_SLOPE_UV;
+	clk->uvdet_offs = ((s32)clk->uv) - data * ADC_SLOPE_UV;
+
+	nvkm_debug(subdev, "calibrated DVFS parameters: offs %d, slope %d\n",
+		   clk->uvdet_offs, clk->uvdet_slope);
+
+calibrated:
+	/* Compute and apply initial DVFS parameters */
+	gm20b_dvfs_calc_det_coeff(clk, clk->uv, &clk->dvfs);
+	gm20b_dvfs_program_coeff(clk, 0);
+	gm20b_dvfs_program_ext_cal(clk, clk->dvfs.dfs_ext_cal);
+	gm20b_dvfs_program_coeff(clk, clk->dvfs.dfs_coeff);
+	gm20b_dvfs_program_dfs_detection(clk, &clk->new_dvfs);
+
+	return 0;
+}
+
+/* Forward declaration to detect speedo >=1 in gm20b_clk_init() */
+static const struct nvkm_clk_func gm20b_clk;
+
 static int
 gm20b_clk_init(struct nvkm_clk *base)
 {
@@ -143,15 +814,56 @@ gm20b_clk_init(struct nvkm_clk *base)
 	struct nvkm_subdev *subdev = &clk->base.subdev;
 	struct nvkm_device *device = subdev->device;
 	int ret;
+	u32 data;
+
+	/* get out from IDDQ */
+	nvkm_mask(device, GPCPLL_CFG, GPCPLL_CFG_IDDQ, 0);
+	nvkm_rd32(device, GPCPLL_CFG);
+	udelay(5);
+
+	nvkm_mask(device, GPC2CLK_OUT, GPC2CLK_OUT_INIT_MASK,
+		  GPC2CLK_OUT_INIT_VAL);
 
 	/* Set the global bypass control to VCO */
 	nvkm_mask(device, BYPASSCTRL_SYS,
 	       MASK(BYPASSCTRL_SYS_GPCPLL_WIDTH) << BYPASSCTRL_SYS_GPCPLL_SHIFT,
 	       0);
 
+	ret = gk20a_clk_setup_slide(clk);
+	if (ret)
+		return ret;
+
+	/* If not fused, set RAM SVOP PDP data 0x2, and enable fuse override */
+	data = nvkm_rd32(device, 0x021944);
+	if (!(data & 0x3)) {
+		data |= 0x2;
+		nvkm_wr32(device, 0x021944, data);
+
+		data = nvkm_rd32(device, 0x021948);
+		data |=  0x1;
+		nvkm_wr32(device, 0x021948, data);
+	}
+
+	/* Disable idle slow down  */
+	nvkm_mask(device, 0x20160, 0x003f0000, 0x0);
+
+	/* speedo >= 1? */
+	if (clk->base.func == &gm20b_clk) {
+		struct gm20b_clk *_clk = gm20b_clk(base);
+		struct nvkm_volt *volt = device->volt;
+
+		/* Get current voltage */
+		_clk->uv = nvkm_volt_get(volt);
+
+		/* Initialize DVFS */
+		ret = gm20b_clk_init_dvfs(_clk);
+		if (ret)
+			return ret;
+	}
+
 	/* Start with lowest frequency */
 	base->func->calc(base, &base->func->pstates[0].base);
-	ret = base->func->prog(&clk->base);
+	ret = base->func->prog(base);
 	if (ret) {
 		nvkm_error(subdev, "cannot initialize clock\n");
 		return ret;
@@ -169,6 +881,7 @@ gm20b_clk_speedo0 = {
 	.prog = gk20a_clk_prog,
 	.tidy = gk20a_clk_tidy,
 	.pstates = gm20b_pstates,
+	/* Speedo 0 only supports 12 voltages */
 	.nr_pstates = ARRAY_SIZE(gm20b_pstates) - 1,
 	.domains = {
 		{ nv_clk_src_crystal, 0xff },
@@ -177,8 +890,26 @@ gm20b_clk_speedo0 = {
 	},
 };
 
-int
-gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+static const struct nvkm_clk_func
+gm20b_clk = {
+	.init = gm20b_clk_init,
+	.fini = gm20b_clk_fini,
+	.read = gk20a_clk_read,
+	.calc = gm20b_clk_calc,
+	.prog = gm20b_clk_prog,
+	.tidy = gk20a_clk_tidy,
+	.pstates = gm20b_pstates,
+	.nr_pstates = ARRAY_SIZE(gm20b_pstates),
+	.domains = {
+		{ nv_clk_src_crystal, 0xff },
+		{ nv_clk_src_gpc, 0xff, 0, "core", GK20A_CLK_GPC_MDIV },
+		{ nv_clk_src_max },
+	},
+};
+
+static int
+gm20b_clk_new_speedo0(struct nvkm_device *device, int index,
+		      struct nvkm_clk **pclk)
 {
 	struct gk20a_clk *clk;
 	int ret;
@@ -188,11 +919,156 @@ gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
 		return -ENOMEM;
 	*pclk = &clk->base;
 
-	ret = _gk20a_clk_ctor(device, index, &gm20b_clk_speedo0,
-			      &gm20b_pllg_params, clk);
+	ret = gk20a_clk_ctor(device, index, &gm20b_clk_speedo0,
+			     &gm20b_pllg_params, clk);
 
 	clk->pl_to_div = pl_to_div;
 	clk->div_to_pl = div_to_pl;
 
 	return ret;
 }
+
+/* FUSE register */
+#define FUSE_RESERVED_CALIB0	0x204
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT	0
+#define FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT	4
+#define FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH	10
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT		14
+#define FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH		10
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT		24
+#define FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH		6
+#define FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT		30
+#define FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH		2
+
+static int
+gm20b_clk_init_fused_params(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	u32 val = 0;
+	u32 rev = 0;
+
+#if IS_ENABLED(CONFIG_ARCH_TEGRA)
+	tegra_fuse_readl(FUSE_RESERVED_CALIB0, &val);
+	rev = (val >> FUSE_RESERVED_CALIB0_FUSE_REV_SHIFT) &
+	      MASK(FUSE_RESERVED_CALIB0_FUSE_REV_WIDTH);
+#endif
+
+	/* No fused parameters, we will calibrate later */
+	if (rev == 0)
+		return -EINVAL;
+
+	/* Integer part in mV + fractional part in uV */
+	clk->uvdet_slope = ((val >> FUSE_RESERVED_CALIB0_SLOPE_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_SLOPE_FRAC_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_SLOPE_FRAC_WIDTH));
+
+	/* Integer part in mV + fractional part in 100uV */
+	clk->uvdet_offs = ((val >> FUSE_RESERVED_CALIB0_INTERCEPT_INT_SHIFT) &
+			MASK(FUSE_RESERVED_CALIB0_INTERCEPT_INT_WIDTH)) * 1000 +
+			((val >> FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_SHIFT) &
+			 MASK(FUSE_RESERVED_CALIB0_INTERCEPT_FRAC_WIDTH)) * 100;
+
+	nvkm_debug(subdev, "fused calibration data: slope %d, offs %d\n",
+		   clk->uvdet_slope, clk->uvdet_offs);
+	return 0;
+}
+
+static int
+gm20b_clk_init_safe_fmax(struct gm20b_clk *clk)
+{
+	struct nvkm_subdev *subdev = &clk->base.base.subdev;
+	struct nvkm_volt *volt = subdev->device->volt;
+	struct nvkm_pstate *pstates = clk->base.base.func->pstates;
+	int nr_pstates = clk->base.base.func->nr_pstates;
+	int vmin, id = 0;
+	u32 fmax = 0;
+	int i;
+
+	/* find lowest voltage we can use */
+	vmin = volt->vid[0].uv;
+	for (i = 1; i < volt->vid_nr; i++) {
+		if (volt->vid[i].uv <= vmin) {
+			vmin = volt->vid[i].uv;
+			id = volt->vid[i].vid;
+		}
+	}
+
+	/* find max frequency at this voltage */
+	for (i = 0; i < nr_pstates; i++)
+		if (pstates[i].base.voltage == id)
+			fmax = max(fmax,
+				   pstates[i].base.domain[nv_clk_src_gpc]);
+
+	if (!fmax) {
+		nvkm_error(subdev, "failed to evaluate safe fmax\n");
+		return -EINVAL;
+	}
+
+	/* we are safe at 90% of the max frequency */
+	clk->safe_fmax_vmin = fmax * (100 - 10) / 100;
+	nvkm_debug(subdev, "safe fmax @ vmin = %u Khz\n", clk->safe_fmax_vmin);
+
+	return 0;
+}
+
+int
+gm20b_clk_new(struct nvkm_device *device, int index, struct nvkm_clk **pclk)
+{
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
+	struct gm20b_clk *clk;
+	struct nvkm_subdev *subdev;
+	struct gk20a_clk_pllg_params *clk_params;
+	int ret;
+
+	/* Speedo 0 GPUs cannot use noise-aware PLL */
+	if (tdev->gpu_speedo_id == 0)
+		return gm20b_clk_new_speedo0(device, index, pclk);
+
+	/* Speedo >= 1, use NAPLL */
+	clk = kzalloc(sizeof(*clk) + sizeof(*clk_params), GFP_KERNEL);
+	if (!clk)
+		return -ENOMEM;
+	*pclk = &clk->base.base;
+	subdev = &clk->base.base.subdev;
+
+	/* duplicate the clock parameters since we will patch them below */
+	clk_params = (void *) (clk + 1);
+	*clk_params = gm20b_pllg_params;
+	ret = gk20a_clk_ctor(device, index, &gm20b_clk, clk_params,
+			     &clk->base);
+	if (ret)
+		return ret;
+
+	/*
+	 * NAPLL can only work with max_u, clamp the m range so
+	 * gk20a_pllg_calc_mnp always uses it
+	 */
+	clk_params->max_m = clk_params->min_m = DIV_ROUND_UP(clk_params->max_u,
+						(clk->base.parent_rate / KHZ));
+	if (clk_params->max_m == 0) {
+		nvkm_warn(subdev, "cannot use NAPLL, using legacy clock...\n");
+		kfree(clk);
+		return gm20b_clk_new_speedo0(device, index, pclk);
+	}
+
+	clk->base.pl_to_div = pl_to_div;
+	clk->base.div_to_pl = div_to_pl;
+
+	clk->dvfs_params = &gm20b_dvfs_params;
+
+	ret = gm20b_clk_init_fused_params(clk);
+	/*
+	 * we will calibrate during init - should never happen on
+	 * prod parts
+	 */
+	if (ret)
+		nvkm_warn(subdev, "no fused calibration parameters\n");
+
+	ret = gm20b_clk_init_safe_fmax(clk);
+	if (ret)
+		return ret;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
index 842d5de96d73..edcc157e6ac8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
@@ -24,6 +24,8 @@ nvkm-y += nvkm/subdev/fb/gk104.o
 nvkm-y += nvkm/subdev/fb/gk20a.o
 nvkm-y += nvkm/subdev/fb/gm107.o
 nvkm-y += nvkm/subdev/fb/gm200.o
+nvkm-y += nvkm/subdev/fb/gp100.o
+nvkm-y += nvkm/subdev/fb/gp104.o
 
 nvkm-y += nvkm/subdev/fb/ram.o
 nvkm-y += nvkm/subdev/fb/ramnv04.o
@@ -41,6 +43,7 @@ nvkm-y += nvkm/subdev/fb/rammcp77.o
 nvkm-y += nvkm/subdev/fb/ramgf100.o
 nvkm-y += nvkm/subdev/fb/ramgk104.o
 nvkm-y += nvkm/subdev/fb/ramgm107.o
+nvkm-y += nvkm/subdev/fb/ramgp100.o
 nvkm-y += nvkm/subdev/fb/sddr2.o
 nvkm-y += nvkm/subdev/fb/sddr3.o
 nvkm-y += nvkm/subdev/fb/gddr3.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
index ce90242b8cce..a7049c041594 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
@@ -25,6 +25,7 @@
 #include "ram.h"
 
 #include <core/memory.h>
+#include <core/option.h>
 #include <subdev/bios.h>
 #include <subdev/bios/M0203.h>
 #include <engine/gr.h>
@@ -134,6 +135,10 @@ nvkm_fb_init(struct nvkm_subdev *subdev)
 
 	if (fb->func->init)
 		fb->func->init(fb);
+	if (fb->func->init_page)
+		fb->func->init_page(fb);
+	if (fb->func->init_unkn)
+		fb->func->init_unkn(fb);
 	return 0;
 }
 
@@ -171,6 +176,7 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device,
 	nvkm_subdev_ctor(&nvkm_fb, device, index, &fb->subdev);
 	fb->func = func;
 	fb->tile.regions = fb->func->tile.regions;
+	fb->page = nvkm_longopt(device->cfgopt, "NvFbBigPage", 0);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
index e649ead5ccfc..76433cc66fff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
@@ -72,6 +72,22 @@ gf100_fb_oneinit(struct nvkm_fb *fb)
 }
 
 void
+gf100_fb_init_page(struct nvkm_fb *fb)
+{
+	struct nvkm_device *device = fb->subdev.device;
+	switch (fb->page) {
+	case 16:
+		nvkm_mask(device, 0x100c80, 0x00000001, 0x00000001);
+		break;
+	case 17:
+	default:
+		nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000);
+		fb->page = 17;
+		break;
+	}
+}
+
+void
 gf100_fb_init(struct nvkm_fb *base)
 {
 	struct gf100_fb *fb = gf100_fb(base);
@@ -79,8 +95,6 @@ gf100_fb_init(struct nvkm_fb *base)
 
 	if (fb->r100c10_page)
 		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
-
-	nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */
 }
 
 void *
@@ -125,6 +139,7 @@ gf100_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gf100_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
index 2160e5a39c9a..449f431644b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
@@ -14,4 +14,6 @@ int gf100_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *,
 void *gf100_fb_dtor(struct nvkm_fb *);
 void gf100_fb_init(struct nvkm_fb *);
 void gf100_fb_intr(struct nvkm_fb *);
+
+void gp100_fb_init(struct nvkm_fb *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
index b41f0f70038c..4245e2e6e604 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk104.c
@@ -29,6 +29,7 @@ gk104_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gk104_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
index 7306f7dfc3b9..f815fe2bbf08 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
@@ -27,7 +27,6 @@ static void
 gk20a_fb_init(struct nvkm_fb *fb)
 {
 	struct nvkm_device *device = fb->subdev.device;
-	nvkm_mask(device, 0x100c80, 0x00000001, 0x00000000); /* 128KiB lpg */
 	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->mmu_wr) >> 8);
 	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->mmu_rd) >> 8);
 }
@@ -36,6 +35,7 @@ static const struct nvkm_fb_func
 gk20a_fb = {
 	.oneinit = gf100_fb_oneinit,
 	.init = gk20a_fb_init,
+	.init_page = gf100_fb_init_page,
 	.memtype_valid = gf100_fb_memtype_valid,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
index 4869fdb753c9..db699025f546 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm107.c
@@ -29,6 +29,7 @@ gm107_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gf100_fb_init,
+	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gm107_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
index 44f5716f64d8..62f653240be3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
@@ -26,6 +26,24 @@
 
 #include <core/memory.h>
 
+void
+gm200_fb_init_page(struct nvkm_fb *fb)
+{
+	struct nvkm_device *device = fb->subdev.device;
+	switch (fb->page) {
+	case 16:
+		nvkm_mask(device, 0x100c80, 0x00000801, 0x00000001);
+		break;
+	case 17:
+		nvkm_mask(device, 0x100c80, 0x00000801, 0x00000000);
+		break;
+	default:
+		nvkm_mask(device, 0x100c80, 0x00000800, 0x00000800);
+		fb->page = 0;
+		break;
+	}
+}
+
 static void
 gm200_fb_init(struct nvkm_fb *base)
 {
@@ -48,6 +66,7 @@ gm200_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gm200_fb_init,
+	.init_page = gm200_fb_init_page,
 	.intr = gf100_fb_intr,
 	.ram_new = gm107_ram_new,
 	.memtype_valid = gf100_fb_memtype_valid,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
new file mode 100644
index 000000000000..98474aec1921
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ram.h"
+
+#include <core/memory.h>
+
+static void
+gp100_fb_init_unkn(struct nvkm_fb *base)
+{
+	struct nvkm_device *device = gf100_fb(base)->base.subdev.device;
+	nvkm_wr32(device, 0x1fac80, nvkm_rd32(device, 0x100c80));
+	nvkm_wr32(device, 0x1facc4, nvkm_rd32(device, 0x100cc4));
+	nvkm_wr32(device, 0x1facc8, nvkm_rd32(device, 0x100cc8));
+	nvkm_wr32(device, 0x1faccc, nvkm_rd32(device, 0x100ccc));
+}
+
+void
+gp100_fb_init(struct nvkm_fb *base)
+{
+	struct gf100_fb *fb = gf100_fb(base);
+	struct nvkm_device *device = fb->base.subdev.device;
+
+	if (fb->r100c10_page)
+		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
+
+	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8);
+	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8);
+	nvkm_mask(device, 0x100cc4, 0x00060000,
+		  max(nvkm_memory_size(fb->base.mmu_rd) >> 16, (u64)2) << 17);
+}
+
+static const struct nvkm_fb_func
+gp100_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gm200_fb_init_page,
+	.init_unkn = gp100_fb_init_unkn,
+	.ram_new = gp100_ram_new,
+	.memtype_valid = gf100_fb_memtype_valid,
+};
+
+int
+gp100_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gf100_fb_new_(&gp100_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c
new file mode 100644
index 000000000000..92cb71861bec
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "gf100.h"
+#include "ram.h"
+
+#include <core/memory.h>
+
+static const struct nvkm_fb_func
+gp104_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gp100_fb_init,
+	.init_page = gm200_fb_init_page,
+	.ram_new = gp100_ram_new,
+	.memtype_valid = gf100_fb_memtype_valid,
+};
+
+int
+gp104_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gf100_fb_new_(&gp104_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
index d97d640e60a0..e905d44fa1d5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
@@ -8,6 +8,8 @@ struct nvkm_fb_func {
 	void *(*dtor)(struct nvkm_fb *);
 	int (*oneinit)(struct nvkm_fb *);
 	void (*init)(struct nvkm_fb *);
+	void (*init_page)(struct nvkm_fb *);
+	void (*init_unkn)(struct nvkm_fb *);
 	void (*intr)(struct nvkm_fb *);
 
 	struct {
@@ -60,5 +62,8 @@ void nv46_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size,
 		       u32 pitch, u32 flags, struct nvkm_fb_tile *);
 
 int gf100_fb_oneinit(struct nvkm_fb *);
+void gf100_fb_init_page(struct nvkm_fb *);
 bool gf100_fb_memtype_valid(struct nvkm_fb *, u32);
+
+void gm200_fb_init_page(struct nvkm_fb *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
index f816cbf2ced3..b9ec0ae6723a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
@@ -47,4 +47,5 @@ int mcp77_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gf100_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gk104_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 int gm107_ram_new(struct nvkm_fb *, struct nvkm_ram **);
+int gp100_ram_new(struct nvkm_fb *, struct nvkm_ram **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
new file mode 100644
index 000000000000..f3be408b5e5e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
@@ -0,0 +1,146 @@
+/*
+ * Copyright 2013 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "ram.h"
+
+#include <subdev/bios.h>
+#include <subdev/bios/init.h>
+#include <subdev/bios/rammap.h>
+
+static int
+gp100_ram_init(struct nvkm_ram *ram)
+{
+	struct nvkm_subdev *subdev = &ram->fb->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_bios *bios = device->bios;
+	u8  ver, hdr, cnt, len, snr, ssz;
+	u32 data;
+	int i;
+
+	/* run a bunch of tables from rammap table.  there's actually
+	 * individual pointers for each rammap entry too, but, nvidia
+	 * seem to just run the last two entries' scripts early on in
+	 * their init, and never again.. we'll just run 'em all once
+	 * for now.
+	 *
+	 * i strongly suspect that each script is for a separate mode
+	 * (likely selected by 0x9a065c's lower bits?), and the
+	 * binary driver skips the one that's already been setup by
+	 * the init tables.
+	 */
+	data = nvbios_rammapTe(bios, &ver, &hdr, &cnt, &len, &snr, &ssz);
+	if (!data || hdr < 0x15)
+		return -EINVAL;
+
+	cnt  = nvbios_rd08(bios, data + 0x14); /* guess at count */
+	data = nvbios_rd32(bios, data + 0x10); /* guess u32... */
+	if (cnt) {
+		u32 save = nvkm_rd32(device, 0x9a065c) & 0x000000f0;
+		for (i = 0; i < cnt; i++, data += 4) {
+			if (i != save >> 4) {
+				nvkm_mask(device, 0x9a065c, 0x000000f0, i << 4);
+				nvbios_exec(&(struct nvbios_init) {
+						.subdev = subdev,
+						.bios = bios,
+						.offset = nvbios_rd32(bios, data),
+						.execute = 1,
+					    });
+			}
+		}
+		nvkm_mask(device, 0x9a065c, 0x000000f0, save);
+	}
+
+	nvkm_mask(device, 0x9a0584, 0x11000000, 0x00000000);
+	nvkm_wr32(device, 0x10ecc0, 0xffffffff);
+	nvkm_mask(device, 0x9a0160, 0x00000010, 0x00000010);
+	return 0;
+}
+
+static const struct nvkm_ram_func
+gp100_ram_func = {
+	.init = gp100_ram_init,
+	.get = gf100_ram_get,
+	.put = gf100_ram_put,
+};
+
+int
+gp100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
+{
+	struct nvkm_ram *ram;
+	struct nvkm_subdev *subdev = &fb->subdev;
+	struct nvkm_device *device = subdev->device;
+	enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios);
+	const u32 rsvd_head = ( 256 * 1024); /* vga memory */
+	const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
+	u32 fbpa_num = nvkm_rd32(device, 0x022438), fbpa;
+	u32 fbio_opt = nvkm_rd32(device, 0x021c14);
+	u64 part, size = 0, comm = ~0ULL;
+	bool mixed = false;
+	int ret;
+
+	nvkm_debug(subdev, "022438: %08x\n", fbpa_num);
+	nvkm_debug(subdev, "021c14: %08x\n", fbio_opt);
+	for (fbpa = 0; fbpa < fbpa_num; fbpa++) {
+		if (!(fbio_opt & (1 << fbpa))) {
+			part = nvkm_rd32(device, 0x90020c + (fbpa * 0x4000));
+			nvkm_debug(subdev, "fbpa %02x: %lld MiB\n", fbpa, part);
+			part = part << 20;
+			if (part != comm) {
+				if (comm != ~0ULL)
+					mixed = true;
+				comm = min(comm, part);
+			}
+			size = size + part;
+		}
+	}
+
+	ret = nvkm_ram_new_(&gp100_ram_func, fb, type, size, 0, &ram);
+	*pram = ram;
+	if (ret)
+		return ret;
+
+	nvkm_mm_fini(&ram->vram);
+
+	if (mixed) {
+		ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
+				   ((comm * fbpa_num) - rsvd_head) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+
+		ret = nvkm_mm_init(&ram->vram, (0x1000000000ULL + comm) >>
+				   NVKM_RAM_MM_SHIFT,
+				   (size - (comm * fbpa_num) - rsvd_tail) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+	} else {
+		ret = nvkm_mm_init(&ram->vram, rsvd_head >> NVKM_RAM_MM_SHIFT,
+				   (size - rsvd_head - rsvd_tail) >>
+				   NVKM_RAM_MM_SHIFT, 1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
index 323c79abe468..41bd5d0f7692 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -276,6 +276,8 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 		struct pwr_rail_t *r = &stbl.rail[i];
 		struct nvkm_iccsense_rail *rail;
 		struct nvkm_iccsense_sensor *sensor;
+		int (*read)(struct nvkm_iccsense *,
+			    struct nvkm_iccsense_rail *);
 
 		if (!r->mode || r->resistor_mohm == 0)
 			continue;
@@ -284,31 +286,31 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 		if (!sensor)
 			continue;
 
-		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
-		if (!rail)
-			return -ENOMEM;
-
 		switch (sensor->type) {
 		case NVBIOS_EXTDEV_INA209:
 			if (r->rail != 0)
 				continue;
-			rail->read = nvkm_iccsense_ina209_read;
+			read = nvkm_iccsense_ina209_read;
 			break;
 		case NVBIOS_EXTDEV_INA219:
 			if (r->rail != 0)
 				continue;
-			rail->read = nvkm_iccsense_ina219_read;
+			read = nvkm_iccsense_ina219_read;
 			break;
 		case NVBIOS_EXTDEV_INA3221:
 			if (r->rail >= 3)
 				continue;
-			rail->read = nvkm_iccsense_ina3221_read;
+			read = nvkm_iccsense_ina3221_read;
 			break;
 		default:
 			continue;
 		}
 
+		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
+		if (!rail)
+			return -ENOMEM;
 		sensor->rail_mask |= 1 << r->rail;
+		rail->read = read;
 		rail->sensor = sensor;
 		rail->idx = r->rail;
 		rail->mohm = r->resistor_mohm;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
index 932b366598aa..12d6f4f102cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
@@ -3,3 +3,4 @@ nvkm-y += nvkm/subdev/ltc/gf100.o
 nvkm-y += nvkm/subdev/ltc/gk104.o
 nvkm-y += nvkm/subdev/ltc/gm107.o
 nvkm-y += nvkm/subdev/ltc/gm200.o
+nvkm-y += nvkm/subdev/ltc/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
index c9eb677967a8..4a0fa0a9b802 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
@@ -23,7 +23,6 @@
  */
 #include "priv.h"
 
-#include <core/enum.h>
 #include <subdev/fb.h>
 #include <subdev/timer.h>
 
@@ -71,7 +70,7 @@ gf100_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth)
 	nvkm_wr32(device, 0x17ea58, depth);
 }
 
-static const struct nvkm_bitfield
+const struct nvkm_bitfield
 gf100_ltc_lts_intr_name[] = {
 	{ 0x00000001, "IDLE_ERROR_IQ" },
 	{ 0x00000002, "IDLE_ERROR_CBC" },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
index e292f5679418..ec0a3844b2d1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
@@ -68,18 +68,22 @@ gm107_ltc_zbc_clear_depth(struct nvkm_ltc *ltc, int i, const u32 depth)
 	nvkm_wr32(device, 0x17e34c, depth);
 }
 
-static void
-gm107_ltc_lts_isr(struct nvkm_ltc *ltc, int c, int s)
+void
+gm107_ltc_intr_lts(struct nvkm_ltc *ltc, int c, int s)
 {
 	struct nvkm_subdev *subdev = &ltc->subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 base = 0x140000 + (c * 0x2000) + (s * 0x200);
-	u32 stat = nvkm_rd32(device, base + 0x00c);
+	u32 base = 0x140400 + (c * 0x2000) + (s * 0x200);
+	u32 intr = nvkm_rd32(device, base + 0x00c);
+	u16 stat = intr & 0x0000ffff;
+	char msg[128];
 
 	if (stat) {
-		nvkm_error(subdev, "LTC%d_LTS%d: %08x\n", c, s, stat);
-		nvkm_wr32(device, base + 0x00c, stat);
+		nvkm_snprintbf(msg, sizeof(msg), gf100_ltc_lts_intr_name, stat);
+		nvkm_error(subdev, "LTC%d_LTS%d: %08x [%s]\n", c, s, intr, msg);
 	}
+
+	nvkm_wr32(device, base + 0x00c, intr);
 }
 
 void
@@ -92,7 +96,7 @@ gm107_ltc_intr(struct nvkm_ltc *ltc)
 	while (mask) {
 		u32 s, c = __ffs(mask);
 		for (s = 0; s < ltc->lts_nr; s++)
-			gm107_ltc_lts_isr(ltc, c, s);
+			gm107_ltc_intr_lts(ltc, c, s);
 		mask &= ~(1 << c);
 	}
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
index 2a29bfd5125a..e18e0dc19ec8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
@@ -46,7 +46,7 @@ static const struct nvkm_ltc_func
 gm200_ltc = {
 	.oneinit = gm200_ltc_oneinit,
 	.init = gm200_ltc_init,
-	.intr = gm107_ltc_intr, /*XXX: not validated */
+	.intr = gm107_ltc_intr,
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
 	.zbc = 16,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
new file mode 100644
index 000000000000..0bdfb2f40266
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include "priv.h"
+
+static void
+gp100_ltc_intr(struct nvkm_ltc *ltc)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	u32 mask;
+
+	mask = nvkm_rd32(device, 0x0001c0);
+	while (mask) {
+		u32 s, c = __ffs(mask);
+		for (s = 0; s < ltc->lts_nr; s++)
+			gm107_ltc_intr_lts(ltc, c, s);
+		mask &= ~(1 << c);
+	}
+}
+
+static int
+gp100_ltc_oneinit(struct nvkm_ltc *ltc)
+{
+	struct nvkm_device *device = ltc->subdev.device;
+	ltc->ltc_nr = nvkm_rd32(device, 0x12006c);
+	ltc->lts_nr = nvkm_rd32(device, 0x17e280) >> 28;
+	/*XXX: tagram allocation - TBD */
+	return nvkm_mm_init(&ltc->tags, 0, 0, 1);
+}
+
+static void
+gp100_ltc_init(struct nvkm_ltc *ltc)
+{
+	/*XXX: PMU LS call to setup tagram address */
+}
+
+static const struct nvkm_ltc_func
+gp100_ltc = {
+	.oneinit = gp100_ltc_oneinit,
+	.init = gp100_ltc_init,
+	.intr = gp100_ltc_intr,
+	.cbc_clear = gm107_ltc_cbc_clear,
+	.cbc_wait = gm107_ltc_cbc_wait,
+	.zbc = 16,
+	.zbc_clear_color = gm107_ltc_zbc_clear_color,
+	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
+	.invalidate = gf100_ltc_invalidate,
+	.flush = gf100_ltc_flush,
+};
+
+int
+gp100_ltc_new(struct nvkm_device *device, int index, struct nvkm_ltc **pltc)
+{
+	return nvkm_ltc_new_(&gp100_ltc, device, index, pltc);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
index 6d81c695ed0d..8b95f96e3ffa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
@@ -2,6 +2,7 @@
 #define __NVKM_LTC_PRIV_H__
 #define nvkm_ltc(p) container_of((p), struct nvkm_ltc, subdev)
 #include <subdev/ltc.h>
+#include <core/enum.h>
 
 int nvkm_ltc_new_(const struct nvkm_ltc_func *, struct nvkm_device *,
 		  int index, struct nvkm_ltc **);
@@ -31,8 +32,10 @@ void gf100_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]);
 void gf100_ltc_zbc_clear_depth(struct nvkm_ltc *, int, const u32);
 void gf100_ltc_invalidate(struct nvkm_ltc *);
 void gf100_ltc_flush(struct nvkm_ltc *);
+extern const struct nvkm_bitfield gf100_ltc_lts_intr_name[];
 
 void gm107_ltc_intr(struct nvkm_ltc *);
+void gm107_ltc_intr_lts(struct nvkm_ltc *, int ltc, int lts);
 void gm107_ltc_cbc_clear(struct nvkm_ltc *, u32, u32);
 void gm107_ltc_cbc_wait(struct nvkm_ltc *);
 void gm107_ltc_zbc_clear_color(struct nvkm_ltc *, int, const u32[4]);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
index 49695ac7be2e..12943f92c206 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
@@ -10,3 +10,4 @@ nvkm-y += nvkm/subdev/mc/gt215.o
 nvkm-y += nvkm/subdev/mc/gf100.o
 nvkm-y += nvkm/subdev/mc/gk104.o
 nvkm-y += nvkm/subdev/mc/gk20a.o
+nvkm-y += nvkm/subdev/mc/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 350a8caa84c8..6b25e25f9eba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -27,43 +27,67 @@
 #include <subdev/top.h>
 
 void
-nvkm_mc_unk260(struct nvkm_mc *mc, u32 data)
+nvkm_mc_unk260(struct nvkm_device *device, u32 data)
 {
-	if (mc->func->unk260)
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc) && mc->func->unk260)
 		mc->func->unk260(mc, data);
 }
 
 void
-nvkm_mc_intr_unarm(struct nvkm_mc *mc)
+nvkm_mc_intr_mask(struct nvkm_device *device, enum nvkm_devidx devidx, bool en)
 {
-	return mc->func->intr_unarm(mc);
+	struct nvkm_mc *mc = device->mc;
+	const struct nvkm_mc_map *map;
+	if (likely(mc) && mc->func->intr_mask) {
+		u32 mask = nvkm_top_intr_mask(device, devidx);
+		for (map = mc->func->intr; !mask && map->stat; map++) {
+			if (map->unit == devidx)
+				mask = map->stat;
+		}
+		mc->func->intr_mask(mc, mask, en ? mask : 0);
+	}
+}
+
+void
+nvkm_mc_intr_unarm(struct nvkm_device *device)
+{
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc))
+		mc->func->intr_unarm(mc);
 }
 
 void
-nvkm_mc_intr_rearm(struct nvkm_mc *mc)
+nvkm_mc_intr_rearm(struct nvkm_device *device)
 {
-	return mc->func->intr_rearm(mc);
+	struct nvkm_mc *mc = device->mc;
+	if (likely(mc))
+		mc->func->intr_rearm(mc);
 }
 
 static u32
-nvkm_mc_intr_mask(struct nvkm_mc *mc)
+nvkm_mc_intr_stat(struct nvkm_mc *mc)
 {
-	u32 intr = mc->func->intr_mask(mc);
+	u32 intr = mc->func->intr_stat(mc);
 	if (WARN_ON_ONCE(intr == 0xffffffff))
 		intr = 0; /* likely fallen off the bus */
 	return intr;
 }
 
 void
-nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
+nvkm_mc_intr(struct nvkm_device *device, bool *handled)
 {
-	struct nvkm_device *device = mc->subdev.device;
+	struct nvkm_mc *mc = device->mc;
 	struct nvkm_subdev *subdev;
-	const struct nvkm_mc_map *map = mc->func->intr;
-	u32 stat, intr = nvkm_mc_intr_mask(mc);
+	const struct nvkm_mc_map *map;
+	u32 stat, intr;
 	u64 subdevs;
 
-	stat = nvkm_top_intr(device->top, intr, &subdevs);
+	if (unlikely(!mc))
+		return;
+
+	intr = nvkm_mc_intr_stat(mc);
+	stat = nvkm_top_intr(device, intr, &subdevs);
 	while (subdevs) {
 		enum nvkm_devidx subidx = __ffs64(subdevs);
 		subdev = nvkm_device_subdev(device, subidx);
@@ -72,14 +96,13 @@ nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
 		subdevs &= ~BIT_ULL(subidx);
 	}
 
-	while (map->stat) {
+	for (map = mc->func->intr; map->stat; map++) {
 		if (intr & map->stat) {
 			subdev = nvkm_device_subdev(device, map->unit);
 			if (subdev)
 				nvkm_subdev_intr(subdev);
 			stat &= ~map->stat;
 		}
-		map++;
 	}
 
 	if (stat)
@@ -87,22 +110,32 @@ nvkm_mc_intr(struct nvkm_mc *mc, bool *handled)
 	*handled = intr != 0;
 }
 
-static void
-nvkm_mc_reset_(struct nvkm_mc *mc, enum nvkm_devidx devidx)
+static u32
+nvkm_mc_reset_mask(struct nvkm_device *device, bool isauto,
+		   enum nvkm_devidx devidx)
 {
-	struct nvkm_device *device = mc->subdev.device;
+	struct nvkm_mc *mc = device->mc;
 	const struct nvkm_mc_map *map;
-	u64 pmc_enable;
-
-	if (!(pmc_enable = nvkm_top_reset(device->top, devidx))) {
-		for (map = mc->func->reset; map && map->stat; map++) {
-			if (map->unit == devidx) {
-				pmc_enable = map->stat;
-				break;
+	u64 pmc_enable = 0;
+	if (likely(mc)) {
+		if (!(pmc_enable = nvkm_top_reset(device, devidx))) {
+			for (map = mc->func->reset; map && map->stat; map++) {
+				if (!isauto || !map->noauto) {
+					if (map->unit == devidx) {
+						pmc_enable = map->stat;
+						break;
+					}
+				}
 			}
 		}
 	}
+	return pmc_enable;
+}
 
+void
+nvkm_mc_reset(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, true, devidx);
 	if (pmc_enable) {
 		nvkm_mask(device, 0x000200, pmc_enable, 0x00000000);
 		nvkm_mask(device, 0x000200, pmc_enable, pmc_enable);
@@ -111,17 +144,27 @@ nvkm_mc_reset_(struct nvkm_mc *mc, enum nvkm_devidx devidx)
 }
 
 void
-nvkm_mc_reset(struct nvkm_mc *mc, enum nvkm_devidx devidx)
+nvkm_mc_disable(struct nvkm_device *device, enum nvkm_devidx devidx)
 {
-	if (likely(mc))
-		nvkm_mc_reset_(mc, devidx);
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+	if (pmc_enable)
+		nvkm_mask(device, 0x000200, pmc_enable, 0x00000000);
+}
+
+void
+nvkm_mc_enable(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+	if (pmc_enable) {
+		nvkm_mask(device, 0x000200, pmc_enable, pmc_enable);
+		nvkm_rd32(device, 0x000200);
+	}
 }
 
 static int
 nvkm_mc_fini(struct nvkm_subdev *subdev, bool suspend)
 {
-	struct nvkm_mc *mc = nvkm_mc(subdev);
-	nvkm_mc_intr_unarm(mc);
+	nvkm_mc_intr_unarm(subdev->device);
 	return 0;
 }
 
@@ -131,7 +174,7 @@ nvkm_mc_init(struct nvkm_subdev *subdev)
 	struct nvkm_mc *mc = nvkm_mc(subdev);
 	if (mc->func->init)
 		mc->func->init(mc);
-	nvkm_mc_intr_rearm(mc);
+	nvkm_mc_intr_rearm(subdev->device);
 	return 0;
 }
 
@@ -148,16 +191,21 @@ nvkm_mc = {
 	.fini = nvkm_mc_fini,
 };
 
+void
+nvkm_mc_ctor(const struct nvkm_mc_func *func, struct nvkm_device *device,
+	     int index, struct nvkm_mc *mc)
+{
+	nvkm_subdev_ctor(&nvkm_mc, device, index, &mc->subdev);
+	mc->func = func;
+}
+
 int
 nvkm_mc_new_(const struct nvkm_mc_func *func, struct nvkm_device *device,
 	     int index, struct nvkm_mc **pmc)
 {
 	struct nvkm_mc *mc;
-
 	if (!(mc = *pmc = kzalloc(sizeof(*mc), GFP_KERNEL)))
 		return -ENOMEM;
-
-	nvkm_subdev_ctor(&nvkm_mc, device, index, &mc->subdev);
-	mc->func = func;
+	nvkm_mc_ctor(func, device, index, *pmc);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
index 5c85b47f071d..c3d66ef5dc12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
@@ -57,7 +57,7 @@ g84_mc = {
 	.intr = g84_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = g84_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
index 0280b43cc10c..93ad4982ce5f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
@@ -57,7 +57,7 @@ g98_mc = {
 	.intr = g98_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = g98_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
index 8397e223bd43..d2c4d6033abb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
@@ -76,7 +76,7 @@ gf100_mc_intr_rearm(struct nvkm_mc *mc)
 }
 
 u32
-gf100_mc_intr_mask(struct nvkm_mc *mc)
+gf100_mc_intr_stat(struct nvkm_mc *mc)
 {
 	struct nvkm_device *device = mc->subdev.device;
 	u32 intr0 = nvkm_rd32(device, 0x000100);
@@ -85,6 +85,14 @@ gf100_mc_intr_mask(struct nvkm_mc *mc)
 }
 
 void
+gf100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 stat)
+{
+	struct nvkm_device *device = mc->subdev.device;
+	nvkm_mask(device, 0x000640, mask, stat);
+	nvkm_mask(device, 0x000644, mask, stat);
+}
+
+void
 gf100_mc_unk260(struct nvkm_mc *mc, u32 data)
 {
 	nvkm_wr32(mc->subdev.device, 0x000260, data);
@@ -97,6 +105,7 @@ gf100_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gf100_mc_reset,
 	.unk260 = gf100_mc_unk260,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
index 317464212c7d..7b8c6ecad1a5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
@@ -26,6 +26,7 @@
 const struct nvkm_mc_map
 gk104_mc_reset[] = {
 	{ 0x00000100, NVKM_ENGINE_FIFO },
+	{ 0x00002000, NVKM_SUBDEV_PMU, true },
 	{}
 };
 
@@ -53,6 +54,7 @@ gk104_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gk104_mc_reset,
 	.unk260 = gf100_mc_unk260,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
index 60b044f517ed..ca1bf3279dbe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
@@ -30,6 +30,7 @@ gk20a_mc = {
 	.intr_unarm = gf100_mc_intr_unarm,
 	.intr_rearm = gf100_mc_intr_rearm,
 	.intr_mask = gf100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
 	.reset = gk104_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
new file mode 100644
index 000000000000..4d22f4abd6de
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2012 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#define gp100_mc(p) container_of((p), struct gp100_mc, base)
+#include "priv.h"
+
+struct gp100_mc {
+	struct nvkm_mc base;
+	spinlock_t lock;
+	bool intr;
+	u32 mask;
+};
+
+static void
+gp100_mc_intr_update(struct gp100_mc *mc)
+{
+	struct nvkm_device *device = mc->base.subdev.device;
+	u32 mask = mc->intr ? mc->mask : 0, i;
+	for (i = 0; i < 2; i++) {
+		nvkm_wr32(device, 0x000180 + (i * 0x04), ~mask);
+		nvkm_wr32(device, 0x000160 + (i * 0x04),  mask);
+	}
+}
+
+static void
+gp100_mc_intr_unarm(struct nvkm_mc *base)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->intr = false;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static void
+gp100_mc_intr_rearm(struct nvkm_mc *base)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->intr = true;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static void
+gp100_mc_intr_mask(struct nvkm_mc *base, u32 mask, u32 intr)
+{
+	struct gp100_mc *mc = gp100_mc(base);
+	unsigned long flags;
+	spin_lock_irqsave(&mc->lock, flags);
+	mc->mask = (mc->mask & ~mask) | intr;
+	gp100_mc_intr_update(mc);
+	spin_unlock_irqrestore(&mc->lock, flags);
+}
+
+static const struct nvkm_mc_func
+gp100_mc = {
+	.init = nv50_mc_init,
+	.intr = gk104_mc_intr,
+	.intr_unarm = gp100_mc_intr_unarm,
+	.intr_rearm = gp100_mc_intr_rearm,
+	.intr_mask = gp100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
+	.reset = gk104_mc_reset,
+};
+
+int
+gp100_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc)
+{
+	struct gp100_mc *mc;
+
+	if (!(mc = kzalloc(sizeof(*mc), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_mc_ctor(&gp100_mc, device, index, &mc->base);
+	*pmc = &mc->base;
+
+	spin_lock_init(&mc->lock);
+	mc->intr = false;
+	mc->mask = 0x7fffffff;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
index aad0ba95bf18..99d50a3d956f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
@@ -53,13 +53,20 @@ gt215_mc_intr[] = {
 	{},
 };
 
+static void
+gt215_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 stat)
+{
+	nvkm_mask(mc->subdev.device, 0x000640, mask, stat);
+}
+
 static const struct nvkm_mc_func
 gt215_mc = {
 	.init = nv50_mc_init,
 	.intr = gt215_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_mask = gt215_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = gt215_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
index a062624e906b..6509defd1460 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
@@ -56,7 +56,7 @@ nv04_mc_intr_rearm(struct nvkm_mc *mc)
 }
 
 u32
-nv04_mc_intr_mask(struct nvkm_mc *mc)
+nv04_mc_intr_stat(struct nvkm_mc *mc)
 {
 	return nvkm_rd32(mc->subdev.device, 0x000100);
 }
@@ -75,7 +75,7 @@ nv04_mc = {
 	.intr = nv04_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv04_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
index 55f0b9166b52..9213107901e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
@@ -39,7 +39,7 @@ nv11_mc = {
 	.intr = nv11_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv04_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
index c40fa67f79a5..64bf5bbf8146 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
@@ -48,7 +48,7 @@ nv17_mc = {
 	.intr = nv17_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
index cc56271db564..65fa44a64b98 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
@@ -43,7 +43,7 @@ nv44_mc = {
 	.intr = nv17_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
index 343b6078580d..fe93b4fd7100 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
@@ -50,7 +50,7 @@ nv50_mc = {
 	.intr = nv50_mc_intr,
 	.intr_unarm = nv04_mc_intr_unarm,
 	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = nv04_mc_intr_mask,
+	.intr_stat = nv04_mc_intr_stat,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
index a12038118512..4f0576a06d24 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
@@ -3,12 +3,15 @@
 #define nvkm_mc(p) container_of((p), struct nvkm_mc, subdev)
 #include <subdev/mc.h>
 
+void nvkm_mc_ctor(const struct nvkm_mc_func *, struct nvkm_device *,
+		  int index, struct nvkm_mc *);
 int nvkm_mc_new_(const struct nvkm_mc_func *, struct nvkm_device *,
 		 int index, struct nvkm_mc **);
 
 struct nvkm_mc_map {
 	u32 stat;
 	u32 unit;
+	bool noauto;
 };
 
 struct nvkm_mc_func {
@@ -18,8 +21,10 @@ struct nvkm_mc_func {
 	void (*intr_unarm)(struct nvkm_mc *);
 	/* enable reporting of interrupts to host */
 	void (*intr_rearm)(struct nvkm_mc *);
+	/* (un)mask delivery of specific interrupts */
+	void (*intr_mask)(struct nvkm_mc *, u32 mask, u32 stat);
 	/* retrieve pending interrupt mask (NV_PMC_INTR) */
-	u32 (*intr_mask)(struct nvkm_mc *);
+	u32 (*intr_stat)(struct nvkm_mc *);
 	const struct nvkm_mc_map *reset;
 	void (*unk260)(struct nvkm_mc *, u32);
 };
@@ -27,7 +32,7 @@ struct nvkm_mc_func {
 void nv04_mc_init(struct nvkm_mc *);
 void nv04_mc_intr_unarm(struct nvkm_mc *);
 void nv04_mc_intr_rearm(struct nvkm_mc *);
-u32 nv04_mc_intr_mask(struct nvkm_mc *);
+u32 nv04_mc_intr_stat(struct nvkm_mc *);
 extern const struct nvkm_mc_map nv04_mc_reset[];
 
 extern const struct nvkm_mc_map nv17_mc_intr[];
@@ -39,7 +44,8 @@ void nv50_mc_init(struct nvkm_mc *);
 
 void gf100_mc_intr_unarm(struct nvkm_mc *);
 void gf100_mc_intr_rearm(struct nvkm_mc *);
-u32 gf100_mc_intr_mask(struct nvkm_mc *);
+void gf100_mc_intr_mask(struct nvkm_mc *, u32, u32);
+u32 gf100_mc_intr_stat(struct nvkm_mc *);
 void gf100_mc_unk260(struct nvkm_mc *, u32);
 
 extern const struct nvkm_mc_map gk104_mc_intr[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 3c2519fdeb81..2a31b7d66a6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -10,3 +10,4 @@ nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
 nvkm-y += nvkm/subdev/pci/gf106.o
 nvkm-y += nvkm/subdev/pci/gk104.o
+nvkm-y += nvkm/subdev/pci/gp100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index 6b0328bd7eed..eb9b278198b2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -69,15 +69,13 @@ static irqreturn_t
 nvkm_pci_intr(int irq, void *arg)
 {
 	struct nvkm_pci *pci = arg;
-	struct nvkm_mc *mc = pci->subdev.device->mc;
+	struct nvkm_device *device = pci->subdev.device;
 	bool handled = false;
-	if (likely(mc)) {
-		nvkm_mc_intr_unarm(mc);
-		if (pci->msi)
-			pci->func->msi_rearm(pci);
-		nvkm_mc_intr(mc, &handled);
-		nvkm_mc_intr_rearm(mc);
-	}
+	nvkm_mc_intr_unarm(device);
+	if (pci->msi)
+		pci->func->msi_rearm(pci);
+	nvkm_mc_intr(device, &handled);
+	nvkm_mc_intr_rearm(device);
 	return handled ? IRQ_HANDLED : IRQ_NONE;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c
new file mode 100644
index 000000000000..82c5234a06ff
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gp100.c
@@ -0,0 +1,44 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+static void
+gp100_pci_msi_rearm(struct nvkm_pci *pci)
+{
+	nvkm_pci_wr32(pci, 0x0704, 0x00000000);
+}
+
+static const struct nvkm_pci_func
+gp100_pci_func = {
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = gp100_pci_msi_rearm,
+};
+
+int
+gp100_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&gp100_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
index 213fdba6cfa0..314be2192b7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -19,8 +19,9 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-
 #include "priv.h"
+
+#include <subdev/mc.h>
 #include <subdev/timer.h>
 
 static const char *
@@ -70,12 +71,11 @@ nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
 	int ret;
 
 	/* enable engine */
-	nvkm_mask(device, 0x200, sb->enable_mask, sb->enable_mask);
-	nvkm_rd32(device, 0x200);
+	nvkm_mc_enable(device, sb->devidx);
 	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
 	if (ret < 0) {
-		nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
 		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
+		nvkm_mc_disable(device, sb->devidx);
 		return ret;
 	}
 
@@ -85,8 +85,7 @@ nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
 
 	/* enable IRQs */
 	nvkm_wr32(device, sb->base + 0x010, 0xff);
-	nvkm_mask(device, 0x640, sb->irq_mask, sb->irq_mask);
-	nvkm_mask(device, 0x644, sb->irq_mask, sb->irq_mask);
+	nvkm_mc_intr_mask(device, sb->devidx, true);
 
 	return 0;
 }
@@ -97,14 +96,13 @@ nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
 	struct nvkm_device *device = sb->subdev.device;
 
 	/* disable IRQs and wait for any previous code to complete */
-	nvkm_mask(device, 0x644, sb->irq_mask, 0x0);
-	nvkm_mask(device, 0x640, sb->irq_mask, 0x0);
+	nvkm_mc_intr_mask(device, sb->devidx, false);
 	nvkm_wr32(device, sb->base + 0x014, 0xff);
 
 	falcon_wait_idle(device, sb->base);
 
 	/* disable engine */
-	nvkm_mask(device, 0x200, sb->enable_mask, 0x0);
+	nvkm_mc_disable(device, sb->devidx);
 
 	return 0;
 }
@@ -216,14 +214,7 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
 		return ret;
 	}
 
-	/*
-	 * Build all blobs - the same blobs can be used to perform secure boot
-	 * multiple times
-	 */
-	if (sb->func->prepare_blobs)
-		ret = sb->func->prepare_blobs(sb);
-
-	return ret;
+	return 0;
 }
 
 static int
@@ -270,9 +261,8 @@ nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
 	/* setup the performing falcon's base address and masks */
 	switch (func->boot_falcon) {
 	case NVKM_SECBOOT_FALCON_PMU:
+		sb->devidx = NVKM_SUBDEV_PMU;
 		sb->base = 0x10a000;
-		sb->irq_mask = 0x1000000;
-		sb->enable_mask = 0x2000;
 		break;
 	default:
 		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
index cc100dc940ea..f1e2dc914366 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -860,6 +860,8 @@ gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
 
 	/* Write LS blob */
 	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
+	if (ret)
+		nvkm_gpuobj_del(&gsb->ls_blob);
 
 cleanup:
 	ls_ucode_mgr_cleanup(&mgr);
@@ -1023,29 +1025,34 @@ gm20x_secboot_prepare_blobs(struct gm200_secboot *gsb)
 	int ret;
 
 	/* Load and prepare the managed falcon's firmwares */
-	ret = gm200_secboot_prepare_ls_blob(gsb);
-	if (ret)
-		return ret;
+	if (!gsb->ls_blob) {
+		ret = gm200_secboot_prepare_ls_blob(gsb);
+		if (ret)
+			return ret;
+	}
 
 	/* Load the HS firmware that will load the LS firmwares */
-	ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
-					    &gsb->acr_load_blob,
-					    &gsb->acr_load_bl_desc, true);
-	if (ret)
-		return ret;
+	if (!gsb->acr_load_blob) {
+		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
+						&gsb->acr_load_blob,
+						&gsb->acr_load_bl_desc, true);
+		if (ret)
+			return ret;
+	}
 
 	/* Load the HS firmware bootloader */
-	ret = gm200_secboot_prepare_hsbl_blob(gsb);
-	if (ret)
-		return ret;
+	if (!gsb->hsbl_blob) {
+		ret = gm200_secboot_prepare_hsbl_blob(gsb);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
 static int
-gm200_secboot_prepare_blobs(struct nvkm_secboot *sb)
+gm200_secboot_prepare_blobs(struct gm200_secboot *gsb)
 {
-	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
 
 	ret = gm20x_secboot_prepare_blobs(gsb);
@@ -1053,15 +1060,37 @@ gm200_secboot_prepare_blobs(struct nvkm_secboot *sb)
 		return ret;
 
 	/* dGPU only: load the HS firmware that unprotects the WPR region */
-	ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
-					    &gsb->acr_unload_blob,
-					    &gsb->acr_unload_bl_desc, false);
-	if (ret)
-		return ret;
+	if (!gsb->acr_unload_blob) {
+		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
+					       &gsb->acr_unload_blob,
+					       &gsb->acr_unload_bl_desc, false);
+		if (ret)
+			return ret;
+	}
 
 	return 0;
 }
 
+static int
+gm200_secboot_blobs_ready(struct gm200_secboot *gsb)
+{
+	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	int ret;
+
+	/* firmware already loaded, nothing to do... */
+	if (gsb->firmware_ok)
+		return 0;
+
+	ret = gsb->func->prepare_blobs(gsb);
+	if (ret) {
+		nvkm_error(subdev, "failed to load secure firmware\n");
+		return ret;
+	}
+
+	gsb->firmware_ok = true;
+
+	return 0;
+}
 
 
 /*
@@ -1234,6 +1263,11 @@ gm200_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
 
+	/* Make sure all blobs are ready */
+	ret = gm200_secboot_blobs_ready(gsb);
+	if (ret)
+		return ret;
+
 	/*
 	 * Dummy GM200 implementation: perform secure boot each time we are
 	 * called on FECS. Since only FECS and GPCCS are managed and started
@@ -1373,7 +1407,6 @@ gm200_secboot = {
 	.dtor = gm200_secboot_dtor,
 	.init = gm200_secboot_init,
 	.fini = gm200_secboot_fini,
-	.prepare_blobs = gm200_secboot_prepare_blobs,
 	.reset = gm200_secboot_reset,
 	.start = gm200_secboot_start,
 	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
@@ -1415,6 +1448,7 @@ gm200_secboot_func = {
 	.bl_desc_size = sizeof(struct gm200_flcn_bl_desc),
 	.fixup_bl_desc = gm200_secboot_fixup_bl_desc,
 	.fixup_hs_desc = gm200_secboot_fixup_hs_desc,
+	.prepare_blobs = gm200_secboot_prepare_blobs,
 };
 
 int
@@ -1487,3 +1521,19 @@ MODULE_FIRMWARE("nvidia/gm206/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/gm206/gr/sw_method_init.bin");
+
+MODULE_FIRMWARE("nvidia/gp100/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/acr/ucode_load.bin");
+MODULE_FIRMWARE("nvidia/gp100/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_inst.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_data.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_inst.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_data.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_ctx.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_nonctx.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_bundle_init.bin");
+MODULE_FIRMWARE("nvidia/gp100/gr/sw_method_init.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
index 684320484b70..d5395ebfe8d3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -42,6 +42,32 @@ struct gm20b_flcn_bl_desc {
 	u32 data_size;
 };
 
+static int
+gm20b_secboot_prepare_blobs(struct gm200_secboot *gsb)
+{
+	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	int acr_size;
+	int ret;
+
+	ret = gm20x_secboot_prepare_blobs(gsb);
+	if (ret)
+		return ret;
+
+	acr_size = gsb->acr_load_blob->size;
+	/*
+	 * On Tegra the WPR region is set by the bootloader. It is illegal for
+	 * the HS blob to be larger than this region.
+	 */
+	if (acr_size > gsb->wpr_size) {
+		nvkm_error(subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(subdev, "required: %dB\n", acr_size);
+		nvkm_error(subdev, "WPR size: %dB\n", gsb->wpr_size);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
 /**
  * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
  *
@@ -88,6 +114,7 @@ gm20b_secboot_func = {
 	.bl_desc_size = sizeof(struct gm20b_flcn_bl_desc),
 	.fixup_bl_desc = gm20b_secboot_fixup_bl_desc,
 	.fixup_hs_desc = gm20b_secboot_fixup_hs_desc,
+	.prepare_blobs = gm20b_secboot_prepare_blobs,
 };
 
 
@@ -147,32 +174,6 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 #endif
 
 static int
-gm20b_secboot_prepare_blobs(struct nvkm_secboot *sb)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int acr_size;
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	acr_size = gsb->acr_load_blob->size;
-	/*
-	 * On Tegra the WPR region is set by the bootloader. It is illegal for
-	 * the HS blob to be larger than this region.
-	 */
-	if (acr_size > gsb->wpr_size) {
-		nvkm_error(&sb->subdev, "WPR region too small for FW blob!\n");
-		nvkm_error(&sb->subdev, "required: %dB\n", acr_size);
-		nvkm_error(&sb->subdev, "WPR size: %dB\n", gsb->wpr_size);
-		return -ENOSPC;
-	}
-
-	return 0;
-}
-
-static int
 gm20b_secboot_init(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
@@ -189,7 +190,6 @@ static const struct nvkm_secboot_func
 gm20b_secboot = {
 	.dtor = gm200_secboot_dtor,
 	.init = gm20b_secboot_init,
-	.prepare_blobs = gm20b_secboot_prepare_blobs,
 	.reset = gm200_secboot_reset,
 	.start = gm200_secboot_start,
 	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
index f2b09dee7c5d..a9a8a0e1017e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -30,7 +30,6 @@ struct nvkm_secboot_func {
 	int (*init)(struct nvkm_secboot *);
 	int (*fini)(struct nvkm_secboot *, bool suspend);
 	void *(*dtor)(struct nvkm_secboot *);
-	int (*prepare_blobs)(struct nvkm_secboot *);
 	int (*reset)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 	int (*start)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 
@@ -147,10 +146,8 @@ struct hsflcn_acr_desc {
  * @inst:		instance block for HS falcon
  * @pgd:		page directory for the HS falcon
  * @vm:			address space used by the HS falcon
- * @bl_desc_size:	size of the BL descriptor used by this chip.
- * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
- *			the generic GM200 format into a data array of size
- *			bl_desc_size
+ * @falcon_state:	current state of the managed falcons
+ * @firmware_ok:	whether the firmware blobs have been created
  */
 struct gm200_secboot {
 	struct nvkm_secboot base;
@@ -196,9 +193,19 @@ struct gm200_secboot {
 		RUNNING,
 	} falcon_state[NVKM_SECBOOT_FALCON_END];
 
+	bool firmware_ok;
 };
 #define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
 
+/**
+ * Contains functions we wish to abstract between GM200-like implementations
+ * @bl_desc_size:	size of the BL descriptor used by this chip.
+ * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
+ *			the generic GM200 format into a data array of size
+ *			bl_desc_size
+ * @fixup_hs_desc:	hook that twiddles the HS descriptor before it is used
+ * @prepare_blobs:	prepares the various blobs needed for secure booting
+ */
 struct gm200_secboot_func {
 	/*
 	 * Size of the bootloader descriptor for this chip. A block of this
@@ -214,6 +221,7 @@ struct gm200_secboot_func {
 	 * we want the HS FW to set up.
 	 */
 	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
+	int (*prepare_blobs)(struct gm200_secboot *);
 };
 
 int gm200_secboot_init(struct nvkm_secboot *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index a1b264664aad..fe063d5728e2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -41,8 +41,9 @@ nvkm_top_device_new(struct nvkm_top *top)
 }
 
 u32
-nvkm_top_reset(struct nvkm_top *top, enum nvkm_devidx index)
+nvkm_top_reset(struct nvkm_device *device, enum nvkm_devidx index)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 
 	if (top) {
@@ -56,8 +57,25 @@ nvkm_top_reset(struct nvkm_top *top, enum nvkm_devidx index)
 }
 
 u32
-nvkm_top_intr(struct nvkm_top *top, u32 intr, u64 *psubdevs)
+nvkm_top_intr_mask(struct nvkm_device *device, enum nvkm_devidx devidx)
 {
+	struct nvkm_top *top = device->top;
+	struct nvkm_top_device *info;
+
+	if (top) {
+		list_for_each_entry(info, &top->device, head) {
+			if (info->index == devidx && info->intr >= 0)
+				return BIT(info->intr);
+		}
+	}
+
+	return 0;
+}
+
+u32
+nvkm_top_intr(struct nvkm_device *device, u32 intr, u64 *psubdevs)
+{
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 	u64 subdevs = 0;
 	u32 handled = 0;
@@ -78,8 +96,9 @@ nvkm_top_intr(struct nvkm_top *top, u32 intr, u64 *psubdevs)
 }
 
 enum nvkm_devidx
-nvkm_top_fault(struct nvkm_top *top, int fault)
+nvkm_top_fault(struct nvkm_device *device, int fault)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 
 	list_for_each_entry(info, &top->device, head) {
@@ -91,8 +110,9 @@ nvkm_top_fault(struct nvkm_top *top, int fault)
 }
 
 enum nvkm_devidx
-nvkm_top_engine(struct nvkm_top *top, int index, int *runl, int *engn)
+nvkm_top_engine(struct nvkm_device *device, int index, int *runl, int *engn)
 {
+	struct nvkm_top *top = device->top;
 	struct nvkm_top_device *info;
 	int n = 0;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
index e06acc340e99..efac3402f9dd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
@@ -29,7 +29,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 	struct nvkm_subdev *subdev = &top->subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_top_device *info = NULL;
-	u32 data, type;
+	u32 data, type, inst;
 	int i;
 
 	for (i = 0; i < 64; i++) {
@@ -37,6 +37,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 			if (!(info = nvkm_top_device_new(top)))
 				return -ENOMEM;
 			type = ~0;
+			inst = 0;
 		}
 
 		data = nvkm_rd32(device, 0x022700 + (i * 0x04));
@@ -45,6 +46,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 		case 0x00000000: /* NOT_VALID */
 			continue;
 		case 0x00000001: /* DATA */
+			inst        = (data & 0x3c000000) >> 26;
 			info->addr  = (data & 0x00fff000);
 			info->fault = (data & 0x000000f8) >> 3;
 			break;
@@ -67,27 +69,32 @@ gk104_top_oneinit(struct nvkm_top *top)
 			continue;
 
 		/* Translate engine type to NVKM engine identifier. */
+#define A_(A) if (inst == 0) info->index = NVKM_ENGINE_##A
+#define B_(A) if (inst + NVKM_ENGINE_##A##0 < NVKM_ENGINE_##A##_LAST + 1)      \
+		info->index = NVKM_ENGINE_##A##0 + inst
 		switch (type) {
-		case 0x00000000: info->index = NVKM_ENGINE_GR; break;
-		case 0x00000001: info->index = NVKM_ENGINE_CE0; break;
-		case 0x00000002: info->index = NVKM_ENGINE_CE1; break;
-		case 0x00000003: info->index = NVKM_ENGINE_CE2; break;
-		case 0x00000008: info->index = NVKM_ENGINE_MSPDEC; break;
-		case 0x00000009: info->index = NVKM_ENGINE_MSPPP; break;
-		case 0x0000000a: info->index = NVKM_ENGINE_MSVLD; break;
-		case 0x0000000b: info->index = NVKM_ENGINE_MSENC; break;
-		case 0x0000000c: info->index = NVKM_ENGINE_VIC; break;
-		case 0x0000000d: info->index = NVKM_ENGINE_SEC; break;
-		case 0x0000000e: info->index = NVKM_ENGINE_NVENC0; break;
-		case 0x0000000f: info->index = NVKM_ENGINE_NVENC1; break;
-		case 0x00000010: info->index = NVKM_ENGINE_NVDEC; break;
+		case 0x00000000: A_(GR    ); break;
+		case 0x00000001: A_(CE0   ); break;
+		case 0x00000002: A_(CE1   ); break;
+		case 0x00000003: A_(CE2   ); break;
+		case 0x00000008: A_(MSPDEC); break;
+		case 0x00000009: A_(MSPPP ); break;
+		case 0x0000000a: A_(MSVLD ); break;
+		case 0x0000000b: A_(MSENC ); break;
+		case 0x0000000c: A_(VIC   ); break;
+		case 0x0000000d: A_(SEC   ); break;
+		case 0x0000000e: B_(NVENC ); break;
+		case 0x0000000f: A_(NVENC1); break;
+		case 0x00000010: A_(NVDEC ); break;
+		case 0x00000013: B_(CE    ); break;
 			break;
 		default:
 			break;
 		}
 
-		nvkm_debug(subdev, "%02x (%8s): addr %06x fault %2d engine %2d "
-				   "runlist %2d intr %2d reset %2d\n", type,
+		nvkm_debug(subdev, "%02x.%d (%8s): addr %06x fault %2d "
+				   "engine %2d runlist %2d intr %2d "
+				   "reset %2d\n", type, inst,
 			   info->index == NVKM_SUBDEV_NR ? NULL :
 					  nvkm_subdev_name[info->index],
 			   info->addr, info->fault, info->engine, info->runlist,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
index 6b2d7531a7ff..1c3d23b0e84a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
@@ -120,6 +120,8 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 
 	data = nvbios_volt_parse(bios, &ver, &hdr, &cnt, &len, &info);
 	if (data && info.vidmask && info.base && info.step) {
+		volt->min_uv = info.min;
+		volt->max_uv = info.max;
 		for (i = 0; i < info.vidmask + 1; i++) {
 			if (info.base >= info.min &&
 				info.base <= info.max) {
@@ -131,6 +133,8 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 		}
 		volt->vid_mask = info.vidmask;
 	} else if (data && info.vidmask) {
+		volt->min_uv = 0xffffffff;
+		volt->max_uv = 0;
 		for (i = 0; i < cnt; i++) {
 			data = nvbios_volt_entry_parse(bios, i, &ver, &hdr,
 						       &ivid);
@@ -138,9 +142,14 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 				volt->vid[volt->vid_nr].uv = ivid.voltage;
 				volt->vid[volt->vid_nr].vid = ivid.vid;
 				volt->vid_nr++;
+				volt->min_uv = min(volt->min_uv, ivid.voltage);
+				volt->max_uv = max(volt->max_uv, ivid.voltage);
 			}
 		}
 		volt->vid_mask = info.vidmask;
+	} else if (data && info.type == NVBIOS_VOLT_PWM) {
+		volt->min_uv = info.base;
+		volt->max_uv = info.base + info.pwm_range;
 	}
 }
 
@@ -181,8 +190,11 @@ nvkm_volt_ctor(const struct nvkm_volt_func *func, struct nvkm_device *device,
 	volt->func = func;
 
 	/* Assuming the non-bios device should build the voltage table later */
-	if (bios)
+	if (bios) {
 		nvkm_volt_parse_bios(bios, volt);
+		nvkm_debug(&volt->subdev, "min: %iuv max: %iuv\n",
+			   volt->min_uv, volt->max_uv);
+	}
 
 	if (volt->vid_nr) {
 		for (i = 0; i < volt->vid_nr; i++) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
index d554455326da..ce5d83cdc7cf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.c
@@ -77,18 +77,19 @@ gk20a_volt_get_cvb_t_voltage(int speedo, int temp, int s_scale, int t_scale,
 	return mv;
 }
 
-int
+static int
 gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo)
 {
+	static const int v_scale = 1000;
 	int mv;
 
 	mv = gk20a_volt_get_cvb_t_voltage(speedo, -10, 100, 10, coef);
-	mv = DIV_ROUND_UP(mv, 1000);
+	mv = DIV_ROUND_UP(mv, v_scale);
 
 	return mv * 1000;
 }
 
-int
+static int
 gk20a_volt_vid_get(struct nvkm_volt *base)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -103,7 +104,7 @@ gk20a_volt_vid_get(struct nvkm_volt *base)
 	return -EINVAL;
 }
 
-int
+static int
 gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -113,7 +114,7 @@ gk20a_volt_vid_set(struct nvkm_volt *base, u8 vid)
 	return regulator_set_voltage(volt->vdd, volt->base.vid[vid].uv, 1200000);
 }
 
-int
+static int
 gk20a_volt_set_id(struct nvkm_volt *base, u8 id, int condition)
 {
 	struct gk20a_volt *volt = gk20a_volt(base);
@@ -143,9 +144,9 @@ gk20a_volt = {
 };
 
 int
-_gk20a_volt_ctor(struct nvkm_device *device, int index,
-		 const struct cvb_coef *coefs, int nb_coefs,
-		 struct gk20a_volt *volt)
+gk20a_volt_ctor(struct nvkm_device *device, int index,
+		const struct cvb_coef *coefs, int nb_coefs,
+		int vmin, struct gk20a_volt *volt)
 {
 	struct nvkm_device_tegra *tdev = device->func->tegra(device);
 	int i, uv;
@@ -160,9 +161,9 @@ _gk20a_volt_ctor(struct nvkm_device *device, int index,
 	volt->base.vid_nr = nb_coefs;
 	for (i = 0; i < volt->base.vid_nr; i++) {
 		volt->base.vid[i].vid = i;
-		volt->base.vid[i].uv =
-			gk20a_volt_calc_voltage(&coefs[i],
-						tdev->gpu_speedo);
+		volt->base.vid[i].uv = max(
+			gk20a_volt_calc_voltage(&coefs[i], tdev->gpu_speedo),
+			vmin);
 		nvkm_debug(&volt->base.subdev, "%2d: vid=%d, uv=%d\n", i,
 			   volt->base.vid[i].vid, volt->base.vid[i].uv);
 	}
@@ -180,6 +181,6 @@ gk20a_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 		return -ENOMEM;
 	*pvolt = &volt->base;
 
-	return _gk20a_volt_ctor(device, index, gk20a_cvb_coef,
-				ARRAY_SIZE(gk20a_cvb_coef), volt);
+	return gk20a_volt_ctor(device, index, gk20a_cvb_coef,
+			       ARRAY_SIZE(gk20a_cvb_coef), 0, volt);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
index 0fa3b502bcf8..6a6c97f9684e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk20a.h
@@ -37,13 +37,8 @@ struct gk20a_volt {
 	struct regulator *vdd;
 };
 
-int _gk20a_volt_ctor(struct nvkm_device *device, int index,
-		     const struct cvb_coef *coefs, int nb_coefs,
-		     struct gk20a_volt *volt);
-
-int gk20a_volt_calc_voltage(const struct cvb_coef *coef, int speedo);
-int gk20a_volt_vid_get(struct nvkm_volt *volt);
-int gk20a_volt_vid_set(struct nvkm_volt *volt, u8 vid);
-int gk20a_volt_set_id(struct nvkm_volt *volt, u8 id, int condition);
+int gk20a_volt_ctor(struct nvkm_device *device, int index,
+		    const struct cvb_coef *coefs, int nb_coefs,
+		    int vmin, struct gk20a_volt *volt);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
index 49b5ecb701e4..74db4d28930f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
@@ -41,16 +41,52 @@ const struct cvb_coef gm20b_cvb_coef[] = {
 	/* 921600 */ { 2647676, -106455, 1632 },
 };
 
+static const struct cvb_coef gm20b_na_cvb_coef[] = {
+	/* KHz,         c0,     c1,   c2,    c3,     c4,   c5 */
+	/*  76800 */ {  814294, 8144, -940, 808, -21583, 226 },
+	/* 153600 */ {  856185, 8144, -940, 808, -21583, 226 },
+	/* 230400 */ {  898077, 8144, -940, 808, -21583, 226 },
+	/* 307200 */ {  939968, 8144, -940, 808, -21583, 226 },
+	/* 384000 */ {  981860, 8144, -940, 808, -21583, 226 },
+	/* 460800 */ { 1023751, 8144, -940, 808, -21583, 226 },
+	/* 537600 */ { 1065642, 8144, -940, 808, -21583, 226 },
+	/* 614400 */ { 1107534, 8144, -940, 808, -21583, 226 },
+	/* 691200 */ { 1149425, 8144, -940, 808, -21583, 226 },
+	/* 768000 */ { 1191317, 8144, -940, 808, -21583, 226 },
+	/* 844800 */ { 1233208, 8144, -940, 808, -21583, 226 },
+	/* 921600 */ { 1275100, 8144, -940, 808, -21583, 226 },
+	/* 998400 */ { 1316991, 8144, -940, 808, -21583, 226 },
+};
+
+const u32 speedo_to_vmin[] = {
+	/*   0,      1,      2,      3,      4, */
+	950000, 840000, 818750, 840000, 810000,
+};
+
 int
 gm20b_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
 {
+	struct nvkm_device_tegra *tdev = device->func->tegra(device);
 	struct gk20a_volt *volt;
+	u32 vmin;
+
+	if (tdev->gpu_speedo_id >= ARRAY_SIZE(speedo_to_vmin)) {
+		nvdev_error(device, "unsupported speedo %d\n",
+			    tdev->gpu_speedo_id);
+		return -EINVAL;
+	}
 
 	volt = kzalloc(sizeof(*volt), GFP_KERNEL);
 	if (!volt)
 		return -ENOMEM;
 	*pvolt = &volt->base;
 
-	return _gk20a_volt_ctor(device, index, gm20b_cvb_coef,
-				ARRAY_SIZE(gm20b_cvb_coef), volt);
+	vmin = speedo_to_vmin[tdev->gpu_speedo_id];
+
+	if (tdev->gpu_speedo_id >= 1)
+		return gk20a_volt_ctor(device, index, gm20b_na_cvb_coef,
+				     ARRAY_SIZE(gm20b_na_cvb_coef), vmin, volt);
+	else
+		return gk20a_volt_ctor(device, index, gm20b_cvb_coef,
+					ARRAY_SIZE(gm20b_cvb_coef), vmin, volt);
 }
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 6f45e9d00b41..e1be5e795cd8 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -1167,7 +1167,6 @@ static int dsi_regulator_init(struct platform_device *dsidev)
 {
 	struct dsi_data *dsi = dsi_get_dsidrv_data(dsidev);
 	struct regulator *vdds_dsi;
-	int r;
 
 	if (dsi->vdds_dsi_reg != NULL)
 		return 0;
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index 061f9bab4c9b..0c0a5139a301 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -120,7 +120,6 @@ static irqreturn_t hdmi_irq_handler(int irq, void *data)
 
 static int hdmi_init_regulator(void)
 {
-	int r;
 	struct regulator *reg;
 
 	if (hdmi.vdda_reg != NULL)
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 0738d74c8d04..d50c9679e631 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -350,11 +350,19 @@ static int qxl_bo_move(struct ttm_buffer_object *bo,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_mem_reg *old_mem = &bo->mem;
+	int ret;
+
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
+
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		qxl_move_null(bo, new_mem);
 		return 0;
 	}
-	return ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+	return ttm_bo_move_memcpy(bo, evict, interruptible,
+				  no_wait_gpu, new_mem);
 }
 
 static void qxl_bo_move_notify(struct ttm_buffer_object *bo,
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index e91763d5d800..a97abc8af657 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -589,7 +589,8 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 		if (ASIC_IS_DCE41(rdev) || ASIC_IS_DCE61(rdev) || ASIC_IS_DCE8(rdev))
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		/* use frac fb div on RS780/RS880 */
-		if ((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		if (((rdev->family == CHIP_RS780) || (rdev->family == CHIP_RS880))
+		    && !radeon_crtc->ss_enabled)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 		if (ASIC_IS_DCE32(rdev) && mode->clock > 165000)
 			radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
@@ -626,7 +627,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 			if (radeon_crtc->ss.refdiv) {
 				radeon_crtc->pll_flags |= RADEON_PLL_USE_REF_DIV;
 				radeon_crtc->pll_reference_div = radeon_crtc->ss.refdiv;
-				if (ASIC_IS_AVIVO(rdev))
+				if (rdev->family >= CHIP_RV770)
 					radeon_crtc->pll_flags |= RADEON_PLL_USE_FRAC_FB_DIV;
 			}
 		}
diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index ba192a35c607..0c1b9ff433af 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -53,6 +53,7 @@ MODULE_FIRMWARE("radeon/bonaire_mc.bin");
 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
+MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
@@ -72,6 +73,7 @@ MODULE_FIRMWARE("radeon/hawaii_mc.bin");
 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
+MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
@@ -1990,12 +1992,17 @@ static int cik_init_microcode(struct radeon_device *rdev)
 	int new_fw = 0;
 	int err;
 	int num_fw;
+	bool new_smc = false;
 
 	DRM_DEBUG("\n");
 
 	switch (rdev->family) {
 	case CHIP_BONAIRE:
 		chip_name = "BONAIRE";
+		if ((rdev->pdev->revision == 0x80) ||
+		    (rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->device == 0x665f))
+			new_smc = true;
 		new_chip_name = "bonaire";
 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
 		me_req_size = CIK_ME_UCODE_SIZE * 4;
@@ -2010,6 +2017,8 @@ static int cik_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_HAWAII:
 		chip_name = "HAWAII";
+		if (rdev->pdev->revision == 0x80)
+			new_smc = true;
 		new_chip_name = "hawaii";
 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
 		me_req_size = CIK_ME_UCODE_SIZE * 4;
@@ -2259,7 +2268,10 @@ static int cik_init_microcode(struct radeon_device *rdev)
 			}
 		}
 
-		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
+		if (new_smc)
+			snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
+		else
+			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
 		if (err) {
 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
@@ -8354,7 +8366,8 @@ static int cik_startup(struct radeon_device *rdev)
 		}
 	}
 	rdev->rlc.cs_data = ci_cs_data;
-	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
+	rdev->rlc.cp_table_size = ALIGN(CP_ME_TABLE_SIZE * 5 * 4, 2048); /* CP JT */
+	rdev->rlc.cp_table_size += 64 * 1024; /* GDS */
 	r = sumo_rlc_init(rdev);
 	if (r) {
 		DRM_ERROR("Failed to init rlc BOs!\n");
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 0d3f744de35a..d960d3915408 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -2209,6 +2209,12 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 		}
 		break;
 	}
+	case PACKET3_PFP_SYNC_ME:
+		if (pkt->count) {
+			DRM_ERROR("bad PFP_SYNC_ME\n");
+			return -EINVAL;
+		}
+		break;
 	case PACKET3_SURFACE_SYNC:
 		if (pkt->count != 3) {
 			DRM_ERROR("bad SURFACE_SYNC\n");
@@ -3381,6 +3387,7 @@ static int evergreen_vm_packet3_check(struct radeon_device *rdev,
 	case PACKET3_MPEG_INDEX:
 	case PACKET3_WAIT_REG_MEM:
 	case PACKET3_MEM_WRITE:
+	case PACKET3_PFP_SYNC_ME:
 	case PACKET3_SURFACE_SYNC:
 	case PACKET3_EVENT_WRITE:
 	case PACKET3_EVENT_WRITE_EOP:
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index 0b174e14e9a6..c8e3d394cde7 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -1624,6 +1624,7 @@
 		 */
 #              define PACKET3_CP_DMA_CMD_SAIC      (1 << 28)
 #              define PACKET3_CP_DMA_CMD_DAIC      (1 << 29)
+#define	PACKET3_PFP_SYNC_ME				0x42
 #define	PACKET3_SURFACE_SYNC				0x43
 #              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
 #              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)
diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
index 95f4fea89302..86dcdf38b732 100644
--- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c
+++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c
@@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <linux/delay.h>
 
 #include "radeon_acpi.h"
 
@@ -27,6 +28,7 @@ struct radeon_atpx_functions {
 struct radeon_atpx {
 	acpi_handle handle;
 	struct radeon_atpx_functions functions;
+	bool is_hybrid;
 };
 
 static struct radeon_atpx_priv {
@@ -62,6 +64,14 @@ bool radeon_has_atpx(void) {
 	return radeon_atpx_priv.atpx_detected;
 }
 
+bool radeon_has_atpx_dgpu_power_cntl(void) {
+	return radeon_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool radeon_is_atpx_hybrid(void) {
+	return radeon_atpx_priv.atpx.is_hybrid;
+}
+
 /**
  * radeon_atpx_call - call an ATPX method
  *
@@ -141,18 +151,12 @@ static void radeon_atpx_parse_functions(struct radeon_atpx_functions *f, u32 mas
  */
 static int radeon_atpx_validate(struct radeon_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	if (atpx->functions.power_cntl == false) {
-		printk("ATPX dGPU power cntl not present, forcing\n");
-		atpx->functions.power_cntl = true;
-	}
+	u32 valid_bits = 0;
 
 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
 		size_t size;
-		u32 valid_bits;
 
 		info = radeon_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
 		if (!info)
@@ -171,19 +175,42 @@ static int radeon_atpx_validate(struct radeon_atpx *atpx)
 		memcpy(&output, info->buffer.pointer, size);
 
 		valid_bits = output.flags & output.valid_flags;
-		/* if separate mux flag is set, mux controls are required */
-		if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
-			atpx->functions.i2c_mux_cntl = true;
-			atpx->functions.disp_mux_cntl = true;
-		}
-		/* if any outputs are muxed, mux controls are required */
-		if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
-				  ATPX_TV_SIGNAL_MUXED |
-				  ATPX_DFP_SIGNAL_MUXED))
-			atpx->functions.disp_mux_cntl = true;
 
 		kfree(info);
 	}
+
+	/* if separate mux flag is set, mux controls are required */
+	if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+		atpx->functions.i2c_mux_cntl = true;
+		atpx->functions.disp_mux_cntl = true;
+	}
+	/* if any outputs are muxed, mux controls are required */
+	if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+			  ATPX_TV_SIGNAL_MUXED |
+			  ATPX_DFP_SIGNAL_MUXED))
+		atpx->functions.disp_mux_cntl = true;
+
+	/* some bioses set these bits rather than flagging power_cntl as supported */
+	if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+			  ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+		atpx->functions.power_cntl = true;
+
+	atpx->is_hybrid = false;
+	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+		printk("ATPX Hybrid Graphics\n");
+#if 1
+		/* This is a temporary hack until the D3 cold support
+		 * makes it upstream.  The ATPX power_control method seems
+		 * to still work on even if the system should be using
+		 * the new standardized hybrid D3 cold ACPI interface.
+		 */
+		atpx->functions.power_cntl = true;
+#else
+		atpx->functions.power_cntl = false;
+#endif
+		atpx->is_hybrid = true;
+	}
+
 	return 0;
 }
 
@@ -258,6 +285,10 @@ static int radeon_atpx_set_discrete_state(struct radeon_atpx *atpx, u8 state)
 		if (!info)
 			return -EIO;
 		kfree(info);
+
+		/* 200ms delay is required after off */
+		if (state == 0)
+			msleep(200);
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 81a63d7f5cd9..b79f3b002471 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -2064,7 +2064,6 @@ radeon_add_atom_connector(struct drm_device *dev,
 							   RADEON_OUTPUT_CSC_BYPASS);
 			/* no HPD on analog connectors */
 			radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
 			break;
@@ -2314,8 +2313,10 @@ radeon_add_atom_connector(struct drm_device *dev,
 	}
 
 	if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
 
@@ -2391,7 +2392,6 @@ radeon_add_legacy_connector(struct drm_device *dev,
 					      1);
 		/* no HPD on analog connectors */
 		radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 		connector->interlace_allowed = true;
 		connector->doublescan_allowed = true;
 		break;
@@ -2476,10 +2476,13 @@ radeon_add_legacy_connector(struct drm_device *dev,
 	}
 
 	if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;
+
 	connector->display_info.subpixel_order = subpixel_order;
 	drm_connector_register(connector);
 }
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index e0bf77843d79..a00dd2f74527 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -631,6 +631,23 @@ void radeon_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
 /*
  * GPU helpers function.
  */
+
+/**
+ * radeon_device_is_virtual - check if we are running is a virtual environment
+ *
+ * Check if the asic has been passed through to a VM (all asics).
+ * Used at driver startup.
+ * Returns true if virtual or false if not.
+ */
+static bool radeon_device_is_virtual(void)
+{
+#ifdef CONFIG_X86
+	return boot_cpu_has(X86_FEATURE_HYPERVISOR);
+#else
+	return false;
+#endif
+}
+
 /**
  * radeon_card_posted - check if the hw has already been initialized
  *
@@ -644,6 +661,10 @@ bool radeon_card_posted(struct radeon_device *rdev)
 {
 	uint32_t reg;
 
+	/* for pass through, always force asic_init */
+	if (radeon_device_is_virtual())
+		return false;
+
 	/* required for EFI mode on macbook2,1 which uses an r5xx asic */
 	if (efi_enabled(EFI_BOOT) &&
 	    (rdev->pdev->subsystem_vendor == PCI_VENDOR_ID_APPLE) &&
@@ -1635,7 +1656,7 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend,
 	radeon_agp_suspend(rdev);
 
 	pci_save_state(dev->pdev);
-	if (freeze && rdev->family >= CHIP_R600) {
+	if (freeze && rdev->family >= CHIP_CEDAR) {
 		rdev->asic->asic_reset(rdev, true);
 		pci_restore_state(dev->pdev);
 	} else if (suspend) {
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index a455dc7d4aa1..c01a7c6abb49 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -93,9 +93,10 @@
  *   2.43.0 - RADEON_INFO_GPU_RESET_COUNTER
  *   2.44.0 - SET_APPEND_CNT packet3 support
  *   2.45.0 - Allow setting shader registers using DMA/COPY packet3 on SI
+ *   2.46.0 - Add PFP_SYNC_ME support on evergreen
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	45
+#define KMS_DRIVER_MINOR	46
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
@@ -162,9 +163,13 @@ void radeon_debugfs_cleanup(struct drm_minor *minor);
 #if defined(CONFIG_VGA_SWITCHEROO)
 void radeon_register_atpx_handler(void);
 void radeon_unregister_atpx_handler(void);
+bool radeon_has_atpx_dgpu_power_cntl(void);
+bool radeon_is_atpx_hybrid(void);
 #else
 static inline void radeon_register_atpx_handler(void) {}
 static inline void radeon_unregister_atpx_handler(void) {}
+static inline bool radeon_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool radeon_is_atpx_hybrid(void) { return false; }
 #endif
 
 int radeon_no_wb;
@@ -404,7 +409,10 @@ static int radeon_pmops_runtime_suspend(struct device *dev)
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_ignore_hotplug(pdev);
-	pci_set_power_state(pdev, PCI_D3cold);
+	if (radeon_is_atpx_hybrid())
+		pci_set_power_state(pdev, PCI_D3cold);
+	else if (!radeon_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D3hot);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
 
 	return 0;
@@ -421,7 +429,9 @@ static int radeon_pmops_runtime_resume(struct device *dev)
 
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
 
-	pci_set_power_state(pdev, PCI_D0);
+	if (radeon_is_atpx_hybrid() ||
+	    !radeon_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	ret = pci_enable_device(pdev);
 	if (ret)
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 590b0377fbe2..ffdad81ef964 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -300,8 +300,7 @@ static int radeon_move_blit(struct ttm_buffer_object *bo,
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
 
-	r = ttm_bo_move_accel_cleanup(bo, &fence->base,
-				      evict, no_wait_gpu, new_mem);
+	r = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, new_mem);
 	radeon_fence_unref(&fence);
 	return r;
 }
@@ -403,6 +402,10 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 	struct ttm_mem_reg *old_mem = &bo->mem;
 	int r;
 
+	r = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (r)
+		return r;
+
 	/* Can't move a pinned BO */
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	if (WARN_ON_ONCE(rbo->pin_count > 0))
@@ -441,7 +444,8 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 
 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, interruptible,
+				       no_wait_gpu, new_mem);
 		if (r) {
 			return r;
 		}
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index b30e719dd56d..2523ca96c6c7 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -50,6 +50,7 @@ MODULE_FIRMWARE("radeon/tahiti_ce.bin");
 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
+MODULE_FIRMWARE("radeon/tahiti_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
@@ -65,6 +66,7 @@ MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
+MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
 MODULE_FIRMWARE("radeon/VERDE_me.bin");
@@ -80,6 +82,7 @@ MODULE_FIRMWARE("radeon/verde_ce.bin");
 MODULE_FIRMWARE("radeon/verde_mc.bin");
 MODULE_FIRMWARE("radeon/verde_rlc.bin");
 MODULE_FIRMWARE("radeon/verde_smc.bin");
+MODULE_FIRMWARE("radeon/verde_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
 MODULE_FIRMWARE("radeon/OLAND_me.bin");
@@ -95,6 +98,7 @@ MODULE_FIRMWARE("radeon/oland_ce.bin");
 MODULE_FIRMWARE("radeon/oland_mc.bin");
 MODULE_FIRMWARE("radeon/oland_rlc.bin");
 MODULE_FIRMWARE("radeon/oland_smc.bin");
+MODULE_FIRMWARE("radeon/oland_k_smc.bin");
 
 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
@@ -110,6 +114,7 @@ MODULE_FIRMWARE("radeon/hainan_ce.bin");
 MODULE_FIRMWARE("radeon/hainan_mc.bin");
 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
 MODULE_FIRMWARE("radeon/hainan_smc.bin");
+MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
 
 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
 static void si_pcie_gen3_enable(struct radeon_device *rdev);
@@ -1653,12 +1658,16 @@ static int si_init_microcode(struct radeon_device *rdev)
 	char fw_name[30];
 	int err;
 	int new_fw = 0;
+	bool new_smc = false;
 
 	DRM_DEBUG("\n");
 
 	switch (rdev->family) {
 	case CHIP_TAHITI:
 		chip_name = "TAHITI";
+		/* XXX: figure out which Tahitis need the new ucode */
+		if (0)
+			new_smc = true;
 		new_chip_name = "tahiti";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1670,6 +1679,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_PITCAIRN:
 		chip_name = "PITCAIRN";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->device == 0x6810) ||
+		    (rdev->pdev->device == 0x6811) ||
+		    (rdev->pdev->device == 0x6816) ||
+		    (rdev->pdev->device == 0x6817) ||
+		    (rdev->pdev->device == 0x6806))
+			new_smc = true;
 		new_chip_name = "pitcairn";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1681,6 +1697,16 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_VERDE:
 		chip_name = "VERDE";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->revision == 0x87) ||
+		    (rdev->pdev->device == 0x6820) ||
+		    (rdev->pdev->device == 0x6821) ||
+		    (rdev->pdev->device == 0x6822) ||
+		    (rdev->pdev->device == 0x6823) ||
+		    (rdev->pdev->device == 0x682A) ||
+		    (rdev->pdev->device == 0x682B))
+			new_smc = true;
 		new_chip_name = "verde";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1692,6 +1718,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_OLAND:
 		chip_name = "OLAND";
+		if ((rdev->pdev->revision == 0xC7) ||
+		    (rdev->pdev->revision == 0x80) ||
+		    (rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->device == 0x6604) ||
+		    (rdev->pdev->device == 0x6605))
+			new_smc = true;
 		new_chip_name = "oland";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1702,6 +1735,13 @@ static int si_init_microcode(struct radeon_device *rdev)
 		break;
 	case CHIP_HAINAN:
 		chip_name = "HAINAN";
+		if ((rdev->pdev->revision == 0x81) ||
+		    (rdev->pdev->revision == 0x83) ||
+		    (rdev->pdev->revision == 0xC3) ||
+		    (rdev->pdev->device == 0x6664) ||
+		    (rdev->pdev->device == 0x6665) ||
+		    (rdev->pdev->device == 0x6667))
+			new_smc = true;
 		new_chip_name = "hainan";
 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
 		me_req_size = SI_PM4_UCODE_SIZE * 4;
@@ -1847,7 +1887,10 @@ static int si_init_microcode(struct radeon_device *rdev)
 		}
 	}
 
-	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
+	if (new_smc)
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
+	else
+		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
 	if (err) {
 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
diff --git a/drivers/gpu/drm/sti/Kconfig b/drivers/gpu/drm/sti/Kconfig
index 5ad43a1bb260..494ab257f77c 100644
--- a/drivers/gpu/drm/sti/Kconfig
+++ b/drivers/gpu/drm/sti/Kconfig
@@ -7,5 +7,6 @@ config DRM_STI
 	select DRM_KMS_CMA_HELPER
 	select DRM_PANEL
 	select FW_LOADER
+	select SND_SOC_HDMI_CODEC if SND_SOC
 	help
 	  Choose this option to enable DRM on STM stiH41x chipset
diff --git a/drivers/gpu/drm/sti/sti_awg_utils.c b/drivers/gpu/drm/sti/sti_awg_utils.c
index a516eb869f6f..2da7d6866d5d 100644
--- a/drivers/gpu/drm/sti/sti_awg_utils.c
+++ b/drivers/gpu/drm/sti/sti_awg_utils.c
@@ -6,6 +6,8 @@
 
 #include "sti_awg_utils.h"
 
+#define AWG_DELAY (-5)
+
 #define AWG_OPCODE_OFFSET 10
 #define AWG_MAX_ARG       0x3ff
 
@@ -125,7 +127,7 @@ static int awg_generate_line_signal(
 		val = timing->blanking_level;
 		ret |= awg_generate_instr(RPLSET, val, 0, 0, fwparams);
 
-		val = timing->trailing_pixels - 1;
+		val = timing->trailing_pixels - 1 + AWG_DELAY;
 		ret |= awg_generate_instr(SKIP, val, 0, 0, fwparams);
 	}
 
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index 7fab3af7473b..c7d734dc3cf4 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -23,22 +23,11 @@
 static void sti_crtc_enable(struct drm_crtc *crtc)
 {
 	struct sti_mixer *mixer = to_sti_mixer(crtc);
-	struct device *dev = mixer->dev;
-	struct sti_compositor *compo = dev_get_drvdata(dev);
 
 	DRM_DEBUG_DRIVER("\n");
 
 	mixer->status = STI_MIXER_READY;
 
-	/* Prepare and enable the compo IP clock */
-	if (mixer->id == STI_MIXER_MAIN) {
-		if (clk_prepare_enable(compo->clk_compo_main))
-			DRM_INFO("Failed to prepare/enable compo_main clk\n");
-	} else {
-		if (clk_prepare_enable(compo->clk_compo_aux))
-			DRM_INFO("Failed to prepare/enable compo_aux clk\n");
-	}
-
 	drm_crtc_vblank_on(crtc);
 }
 
@@ -57,9 +46,8 @@ sti_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode)
 	struct sti_mixer *mixer = to_sti_mixer(crtc);
 	struct device *dev = mixer->dev;
 	struct sti_compositor *compo = dev_get_drvdata(dev);
-	struct clk *clk;
+	struct clk *compo_clk, *pix_clk;
 	int rate = mode->clock * 1000;
-	int res;
 
 	DRM_DEBUG_KMS("CRTC:%d (%s) mode:%d (%s)\n",
 		      crtc->base.id, sti_mixer_to_str(mixer),
@@ -74,32 +62,46 @@ sti_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode)
 		      mode->vsync_start, mode->vsync_end,
 		      mode->vtotal, mode->type, mode->flags);
 
-	/* Set rate and prepare/enable pixel clock */
-	if (mixer->id == STI_MIXER_MAIN)
-		clk = compo->clk_pix_main;
-	else
-		clk = compo->clk_pix_aux;
+	if (mixer->id == STI_MIXER_MAIN) {
+		compo_clk = compo->clk_compo_main;
+		pix_clk = compo->clk_pix_main;
+	} else {
+		compo_clk = compo->clk_compo_aux;
+		pix_clk = compo->clk_pix_aux;
+	}
+
+	/* Prepare and enable the compo IP clock */
+	if (clk_prepare_enable(compo_clk)) {
+		DRM_INFO("Failed to prepare/enable compositor clk\n");
+		goto compo_error;
+	}
 
-	res = clk_set_rate(clk, rate);
-	if (res < 0) {
+	/* Set rate and prepare/enable pixel clock */
+	if (clk_set_rate(pix_clk, rate) < 0) {
 		DRM_ERROR("Cannot set rate (%dHz) for pix clk\n", rate);
-		return -EINVAL;
+		goto pix_error;
 	}
-	if (clk_prepare_enable(clk)) {
+	if (clk_prepare_enable(pix_clk)) {
 		DRM_ERROR("Failed to prepare/enable pix clk\n");
-		return -EINVAL;
+		goto pix_error;
 	}
 
 	sti_vtg_set_config(mixer->id == STI_MIXER_MAIN ?
 			compo->vtg_main : compo->vtg_aux, &crtc->mode);
 
-	res = sti_mixer_active_video_area(mixer, &crtc->mode);
-	if (res) {
+	if (sti_mixer_active_video_area(mixer, &crtc->mode)) {
 		DRM_ERROR("Can't set active video area\n");
-		return -EINVAL;
+		goto mixer_error;
 	}
 
-	return res;
+	return 0;
+
+mixer_error:
+	clk_disable_unprepare(pix_clk);
+pix_error:
+	clk_disable_unprepare(compo_clk);
+compo_error:
+	return -EINVAL;
 }
 
 static void sti_crtc_disable(struct drm_crtc *crtc)
@@ -130,7 +132,6 @@ static void sti_crtc_disable(struct drm_crtc *crtc)
 static void
 sti_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
-	sti_crtc_enable(crtc);
 	sti_crtc_mode_set(crtc, &crtc->state->adjusted_mode);
 }
 
@@ -221,9 +222,7 @@ static void sti_crtc_atomic_flush(struct drm_crtc *crtc,
 static const struct drm_crtc_helper_funcs sti_crtc_helper_funcs = {
 	.enable = sti_crtc_enable,
 	.disable = sti_crtc_disabling,
-	.mode_set = drm_helper_crtc_mode_set,
 	.mode_set_nofb = sti_crtc_mode_set_nofb,
-	.mode_set_base = drm_helper_crtc_mode_set_base,
 	.atomic_begin = sti_crtc_atomic_begin,
 	.atomic_flush = sti_crtc_atomic_flush,
 };
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index 8d1402b245bf..fedc17f98d9b 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -18,6 +18,8 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 
+#include <sound/hdmi-codec.h>
+
 #include "sti_hdmi.h"
 #include "sti_hdmi_tx3g4c28phy.h"
 #include "sti_hdmi_tx3g0c55phy.h"
@@ -35,6 +37,8 @@
 #define HDMI_DFLT_CHL0_DAT              0x0110
 #define HDMI_DFLT_CHL1_DAT              0x0114
 #define HDMI_DFLT_CHL2_DAT              0x0118
+#define HDMI_AUDIO_CFG                  0x0200
+#define HDMI_SPDIF_FIFO_STATUS          0x0204
 #define HDMI_SW_DI_1_HEAD_WORD          0x0210
 #define HDMI_SW_DI_1_PKT_WORD0          0x0214
 #define HDMI_SW_DI_1_PKT_WORD1          0x0218
@@ -44,6 +48,9 @@
 #define HDMI_SW_DI_1_PKT_WORD5          0x0228
 #define HDMI_SW_DI_1_PKT_WORD6          0x022C
 #define HDMI_SW_DI_CFG                  0x0230
+#define HDMI_SAMPLE_FLAT_MASK           0x0244
+#define HDMI_AUDN                       0x0400
+#define HDMI_AUD_CTS                    0x0404
 #define HDMI_SW_DI_2_HEAD_WORD          0x0600
 #define HDMI_SW_DI_2_PKT_WORD0          0x0604
 #define HDMI_SW_DI_2_PKT_WORD1          0x0608
@@ -103,6 +110,7 @@
 #define HDMI_INT_DLL_LCK                BIT(5)
 #define HDMI_INT_NEW_FRAME              BIT(6)
 #define HDMI_INT_GENCTRL_PKT            BIT(7)
+#define HDMI_INT_AUDIO_FIFO_XRUN        BIT(8)
 #define HDMI_INT_SINK_TERM_PRESENT      BIT(11)
 
 #define HDMI_DEFAULT_INT (HDMI_INT_SINK_TERM_PRESENT \
@@ -111,6 +119,7 @@
 			| HDMI_INT_GLOBAL)
 
 #define HDMI_WORKING_INT (HDMI_INT_SINK_TERM_PRESENT \
+			| HDMI_INT_AUDIO_FIFO_XRUN \
 			| HDMI_INT_GENCTRL_PKT \
 			| HDMI_INT_NEW_FRAME \
 			| HDMI_INT_DLL_LCK \
@@ -121,6 +130,27 @@
 
 #define HDMI_STA_SW_RST                 BIT(1)
 
+#define HDMI_AUD_CFG_8CH		BIT(0)
+#define HDMI_AUD_CFG_SPDIF_DIV_2	BIT(1)
+#define HDMI_AUD_CFG_SPDIF_DIV_3	BIT(2)
+#define HDMI_AUD_CFG_SPDIF_CLK_DIV_4	(BIT(1) | BIT(2))
+#define HDMI_AUD_CFG_CTS_CLK_256FS	BIT(12)
+#define HDMI_AUD_CFG_DTS_INVALID	BIT(16)
+#define HDMI_AUD_CFG_ONE_BIT_INVALID	(BIT(18) | BIT(19) | BIT(20) |  BIT(21))
+#define HDMI_AUD_CFG_CH12_VALID	BIT(28)
+#define HDMI_AUD_CFG_CH34_VALID	BIT(29)
+#define HDMI_AUD_CFG_CH56_VALID	BIT(30)
+#define HDMI_AUD_CFG_CH78_VALID	BIT(31)
+
+/* sample flat mask */
+#define HDMI_SAMPLE_FLAT_NO	 0
+#define HDMI_SAMPLE_FLAT_SP0 BIT(0)
+#define HDMI_SAMPLE_FLAT_SP1 BIT(1)
+#define HDMI_SAMPLE_FLAT_SP2 BIT(2)
+#define HDMI_SAMPLE_FLAT_SP3 BIT(3)
+#define HDMI_SAMPLE_FLAT_ALL (HDMI_SAMPLE_FLAT_SP0 | HDMI_SAMPLE_FLAT_SP1 |\
+			      HDMI_SAMPLE_FLAT_SP2 | HDMI_SAMPLE_FLAT_SP3)
+
 #define HDMI_INFOFRAME_HEADER_TYPE(x)    (((x) & 0xff) <<  0)
 #define HDMI_INFOFRAME_HEADER_VERSION(x) (((x) & 0xff) <<  8)
 #define HDMI_INFOFRAME_HEADER_LEN(x)     (((x) & 0x0f) << 16)
@@ -171,6 +201,10 @@ static irqreturn_t hdmi_irq_thread(int irq, void *arg)
 		wake_up_interruptible(&hdmi->wait_event);
 	}
 
+	/* Audio FIFO underrun IRQ */
+	if (hdmi->irq_status & HDMI_INT_AUDIO_FIFO_XRUN)
+		DRM_INFO("Warning: audio FIFO underrun occurs!");
+
 	return IRQ_HANDLED;
 }
 
@@ -441,26 +475,29 @@ static int hdmi_avi_infoframe_config(struct sti_hdmi *hdmi)
  */
 static int hdmi_audio_infoframe_config(struct sti_hdmi *hdmi)
 {
-	struct hdmi_audio_infoframe infofame;
+	struct hdmi_audio_params *audio = &hdmi->audio;
 	u8 buffer[HDMI_INFOFRAME_SIZE(AUDIO)];
-	int ret;
-
-	ret = hdmi_audio_infoframe_init(&infofame);
-	if (ret < 0) {
-		DRM_ERROR("failed to setup audio infoframe: %d\n", ret);
-		return ret;
-	}
-
-	infofame.channels = 2;
-
-	ret = hdmi_audio_infoframe_pack(&infofame, buffer, sizeof(buffer));
-	if (ret < 0) {
-		DRM_ERROR("failed to pack audio infoframe: %d\n", ret);
-		return ret;
+	int ret, val;
+
+	DRM_DEBUG_DRIVER("enter %s, AIF %s\n", __func__,
+			 audio->enabled ? "enable" : "disable");
+	if (audio->enabled) {
+		/* set audio parameters stored*/
+		ret = hdmi_audio_infoframe_pack(&audio->cea, buffer,
+						sizeof(buffer));
+		if (ret < 0) {
+			DRM_ERROR("failed to pack audio infoframe: %d\n", ret);
+			return ret;
+		}
+		hdmi_infoframe_write_infopack(hdmi, buffer, ret);
+	} else {
+		/*disable audio info frame transmission */
+		val = hdmi_read(hdmi, HDMI_SW_DI_CFG);
+		val &= ~HDMI_IFRAME_CFG_DI_N(HDMI_IFRAME_MASK,
+					     HDMI_IFRAME_SLOT_AUDIO);
+		hdmi_write(hdmi, val, HDMI_SW_DI_CFG);
 	}
 
-	hdmi_infoframe_write_infopack(hdmi, buffer, ret);
-
 	return 0;
 }
 
@@ -650,6 +687,10 @@ static int hdmi_dbg_show(struct seq_file *s, void *data)
 	DBGFS_DUMP("", HDMI_SW_DI_CFG);
 	hdmi_dbg_sw_di_cfg(s, hdmi_read(hdmi, HDMI_SW_DI_CFG));
 
+	DBGFS_DUMP("\n", HDMI_AUDIO_CFG);
+	DBGFS_DUMP("\n", HDMI_SPDIF_FIFO_STATUS);
+	DBGFS_DUMP("\n", HDMI_AUDN);
+
 	seq_printf(s, "\n AVI Infoframe (Data Island slot N=%d):",
 		   HDMI_IFRAME_SLOT_AVI);
 	DBGFS_DUMP_DI(HDMI_SW_DI_N_HEAD_WORD, HDMI_IFRAME_SLOT_AVI);
@@ -854,6 +895,7 @@ static int sti_hdmi_connector_get_modes(struct drm_connector *connector)
 
 	count = drm_add_edid_modes(connector, edid);
 	drm_mode_connector_update_edid_property(connector, edid);
+	drm_edid_to_eld(connector, edid);
 
 	kfree(edid);
 	return count;
@@ -1036,6 +1078,207 @@ static struct drm_encoder *sti_hdmi_find_encoder(struct drm_device *dev)
 	return NULL;
 }
 
+/**
+ * sti_hdmi_audio_get_non_coherent_n() - get N parameter for non-coherent
+ * clocks. None-coherent clocks means that audio and TMDS clocks have not the
+ * same source (drifts between clocks). In this case assumption is that CTS is
+ * automatically calculated by hardware.
+ *
+ * @audio_fs: audio frame clock frequency in Hz
+ *
+ * Values computed are based on table described in HDMI specification 1.4b
+ *
+ * Returns n value.
+ */
+static int sti_hdmi_audio_get_non_coherent_n(unsigned int audio_fs)
+{
+	unsigned int n;
+
+	switch (audio_fs) {
+	case 32000:
+		n = 4096;
+		break;
+	case 44100:
+		n = 6272;
+		break;
+	case 48000:
+		n = 6144;
+		break;
+	case 88200:
+		n = 6272 * 2;
+		break;
+	case 96000:
+		n = 6144 * 2;
+		break;
+	case 176400:
+		n = 6272 * 4;
+		break;
+	case 192000:
+		n = 6144 * 4;
+		break;
+	default:
+		/* Not pre-defined, recommended value: 128 * fs / 1000 */
+		n = (audio_fs * 128) / 1000;
+	}
+
+	return n;
+}
+
+static int hdmi_audio_configure(struct sti_hdmi *hdmi,
+				struct hdmi_audio_params *params)
+{
+	int audio_cfg, n;
+	struct hdmi_audio_infoframe *info = &params->cea;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!hdmi->enabled)
+		return 0;
+
+	/* update N parameter */
+	n = sti_hdmi_audio_get_non_coherent_n(params->sample_rate);
+
+	DRM_DEBUG_DRIVER("Audio rate = %d Hz, TMDS clock = %d Hz, n = %d\n",
+			 params->sample_rate, hdmi->mode.clock * 1000, n);
+	hdmi_write(hdmi, n, HDMI_AUDN);
+
+	/* update HDMI registers according to configuration */
+	audio_cfg = HDMI_AUD_CFG_SPDIF_DIV_2 | HDMI_AUD_CFG_DTS_INVALID |
+		    HDMI_AUD_CFG_ONE_BIT_INVALID;
+
+	switch (info->channels) {
+	case 8:
+		audio_cfg |= HDMI_AUD_CFG_CH78_VALID;
+	case 6:
+		audio_cfg |= HDMI_AUD_CFG_CH56_VALID;
+	case 4:
+		audio_cfg |= HDMI_AUD_CFG_CH34_VALID | HDMI_AUD_CFG_8CH;
+	case 2:
+		audio_cfg |= HDMI_AUD_CFG_CH12_VALID;
+		break;
+	default:
+		DRM_ERROR("ERROR: Unsupported number of channels (%d)!\n",
+			  info->channels);
+		return -EINVAL;
+	}
+
+	hdmi_write(hdmi, audio_cfg, HDMI_AUDIO_CFG);
+
+	hdmi->audio = *params;
+
+	return hdmi_audio_infoframe_config(hdmi);
+}
+
+static void hdmi_audio_shutdown(struct device *dev, void *data)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	int audio_cfg;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	/* disable audio */
+	audio_cfg = HDMI_AUD_CFG_SPDIF_DIV_2 | HDMI_AUD_CFG_DTS_INVALID |
+		    HDMI_AUD_CFG_ONE_BIT_INVALID;
+	hdmi_write(hdmi, audio_cfg, HDMI_AUDIO_CFG);
+
+	hdmi->audio.enabled = 0;
+	hdmi_audio_infoframe_config(hdmi);
+}
+
+static int hdmi_audio_hw_params(struct device *dev,
+				void *data,
+				struct hdmi_codec_daifmt *daifmt,
+				struct hdmi_codec_params *params)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	int ret;
+	struct hdmi_audio_params audio = {
+		.sample_width = params->sample_width,
+		.sample_rate = params->sample_rate,
+		.cea = params->cea,
+	};
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!hdmi->enabled)
+		return 0;
+
+	if ((daifmt->fmt != HDMI_I2S) || daifmt->bit_clk_inv ||
+	    daifmt->frame_clk_inv || daifmt->bit_clk_master ||
+	    daifmt->frame_clk_master) {
+		dev_err(dev, "%s: Bad flags %d %d %d %d\n", __func__,
+			daifmt->bit_clk_inv, daifmt->frame_clk_inv,
+			daifmt->bit_clk_master,
+			daifmt->frame_clk_master);
+		return -EINVAL;
+	}
+
+	audio.enabled = 1;
+
+	ret = hdmi_audio_configure(hdmi, &audio);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int hdmi_audio_digital_mute(struct device *dev, void *data, bool enable)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+
+	DRM_DEBUG_DRIVER("%s\n", enable ? "enable" : "disable");
+
+	if (enable)
+		hdmi_write(hdmi, HDMI_SAMPLE_FLAT_ALL, HDMI_SAMPLE_FLAT_MASK);
+	else
+		hdmi_write(hdmi, HDMI_SAMPLE_FLAT_NO, HDMI_SAMPLE_FLAT_MASK);
+
+	return 0;
+}
+
+static int hdmi_audio_get_eld(struct device *dev, void *data, uint8_t *buf, size_t len)
+{
+	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
+	struct drm_connector *connector = hdmi->drm_connector;
+
+	DRM_DEBUG_DRIVER("\n");
+	memcpy(buf, connector->eld, min(sizeof(connector->eld), len));
+
+	return 0;
+}
+
+static const struct hdmi_codec_ops audio_codec_ops = {
+	.hw_params = hdmi_audio_hw_params,
+	.audio_shutdown = hdmi_audio_shutdown,
+	.digital_mute = hdmi_audio_digital_mute,
+	.get_eld = hdmi_audio_get_eld,
+};
+
+static int sti_hdmi_register_audio_driver(struct device *dev,
+					  struct sti_hdmi *hdmi)
+{
+	struct hdmi_codec_pdata codec_data = {
+		.ops = &audio_codec_ops,
+		.max_i2s_channels = 8,
+		.i2s = 1,
+	};
+
+	DRM_DEBUG_DRIVER("\n");
+
+	hdmi->audio.enabled = 0;
+
+	hdmi->audio_pdev = platform_device_register_data(
+		dev, HDMI_CODEC_DRV_NAME, PLATFORM_DEVID_AUTO,
+		&codec_data, sizeof(codec_data));
+
+	if (IS_ERR(hdmi->audio_pdev))
+		return PTR_ERR(hdmi->audio_pdev);
+
+	DRM_INFO("%s Driver bound %s\n", HDMI_CODEC_DRV_NAME, dev_name(dev));
+
+	return 0;
+}
+
 static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct sti_hdmi *hdmi = dev_get_drvdata(dev);
@@ -1082,12 +1325,27 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 	/* initialise property */
 	sti_hdmi_connector_init_property(drm_dev, drm_connector);
 
+	hdmi->drm_connector = drm_connector;
+
 	err = drm_mode_connector_attach_encoder(drm_connector, encoder);
 	if (err) {
 		DRM_ERROR("Failed to attach a connector to a encoder\n");
 		goto err_sysfs;
 	}
 
+	err = sti_hdmi_register_audio_driver(dev, hdmi);
+	if (err) {
+		DRM_ERROR("Failed to attach an audio codec\n");
+		goto err_sysfs;
+	}
+
+	/* Initialize audio infoframe */
+	err = hdmi_audio_infoframe_init(&hdmi->audio.cea);
+	if (err) {
+		DRM_ERROR("Failed to init audio infoframe\n");
+		goto err_sysfs;
+	}
+
 	/* Enable default interrupts */
 	hdmi_write(hdmi, HDMI_DEFAULT_INT, HDMI_INT_EN);
 
@@ -1095,6 +1353,7 @@ static int sti_hdmi_bind(struct device *dev, struct device *master, void *data)
 
 err_sysfs:
 	drm_bridge_remove(bridge);
+	hdmi->drm_connector = NULL;
 	return -EINVAL;
 }
 
@@ -1244,6 +1503,8 @@ static int sti_hdmi_remove(struct platform_device *pdev)
 	struct sti_hdmi *hdmi = dev_get_drvdata(&pdev->dev);
 
 	i2c_put_adapter(hdmi->ddc_adapt);
+	if (hdmi->audio_pdev)
+		platform_device_unregister(hdmi->audio_pdev);
 	component_del(&pdev->dev, &sti_hdmi_ops);
 
 	return 0;
diff --git a/drivers/gpu/drm/sti/sti_hdmi.h b/drivers/gpu/drm/sti/sti_hdmi.h
index ef3a94583bbd..119bc3582ac7 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.h
+++ b/drivers/gpu/drm/sti/sti_hdmi.h
@@ -23,6 +23,13 @@ struct hdmi_phy_ops {
 	void (*stop)(struct sti_hdmi *hdmi);
 };
 
+struct hdmi_audio_params {
+	bool enabled;
+	unsigned int sample_width;
+	unsigned int sample_rate;
+	struct hdmi_audio_infoframe cea;
+};
+
 /* values for the framing mode property */
 enum sti_hdmi_modes {
 	HDMI_MODE_HDMI,
@@ -67,6 +74,9 @@ static const struct drm_prop_enum_list colorspace_mode_names[] = {
  * @ddc_adapt: i2c ddc adapter
  * @colorspace: current colorspace selected
  * @hdmi_mode: select framing for HDMI or DVI
+ * @audio_pdev: ASoC hdmi-codec platform device
+ * @audio: hdmi audio parameters.
+ * @drm_connector: hdmi connector
  */
 struct sti_hdmi {
 	struct device dev;
@@ -89,6 +99,9 @@ struct sti_hdmi {
 	struct i2c_adapter *ddc_adapt;
 	enum hdmi_colorspace colorspace;
 	enum sti_hdmi_modes hdmi_mode;
+	struct platform_device *audio_pdev;
+	struct hdmi_audio_params audio;
+	struct drm_connector *drm_connector;
 };
 
 u32 hdmi_read(struct sti_hdmi *hdmi, int offset);
diff --git a/drivers/gpu/drm/sti/sti_plane.c b/drivers/gpu/drm/sti/sti_plane.c
index 85cee9098439..0cf3335ef37c 100644
--- a/drivers/gpu/drm/sti/sti_plane.c
+++ b/drivers/gpu/drm/sti/sti_plane.c
@@ -45,25 +45,15 @@ const char *sti_plane_to_str(struct sti_plane *plane)
 
 #define STI_FPS_INTERVAL_MS     3000
 
-static int sti_plane_timespec_ms_diff(struct timespec lhs, struct timespec rhs)
-{
-	struct timespec tmp_ts = timespec_sub(lhs, rhs);
-	u64 tmp_ns = (u64)timespec_to_ns(&tmp_ts);
-
-	do_div(tmp_ns, NSEC_PER_MSEC);
-
-	return (u32)tmp_ns;
-}
-
 void sti_plane_update_fps(struct sti_plane *plane,
 			  bool new_frame,
 			  bool new_field)
 {
-	struct timespec now;
+	ktime_t now;
 	struct sti_fps_info *fps;
 	int fpks, fipks, ms_since_last, num_frames, num_fields;
 
-	getrawmonotonic(&now);
+	now = ktime_get();
 
 	/* Compute number of frame updates */
 	fps = &plane->fps_info;
@@ -76,7 +66,7 @@ void sti_plane_update_fps(struct sti_plane *plane,
 		return;
 
 	fps->curr_frame_counter++;
-	ms_since_last = sti_plane_timespec_ms_diff(now, fps->last_timestamp);
+	ms_since_last = ktime_to_ms(ktime_sub(now, fps->last_timestamp));
 	num_frames = fps->curr_frame_counter - fps->last_frame_counter;
 
 	if (num_frames <= 0  || ms_since_last < STI_FPS_INTERVAL_MS)
diff --git a/drivers/gpu/drm/sti/sti_plane.h b/drivers/gpu/drm/sti/sti_plane.h
index 39d39f5b7dd9..e0ea1dd3bb88 100644
--- a/drivers/gpu/drm/sti/sti_plane.h
+++ b/drivers/gpu/drm/sti/sti_plane.h
@@ -55,7 +55,7 @@ struct sti_fps_info {
 	unsigned int last_frame_counter;
 	unsigned int curr_field_counter;
 	unsigned int last_field_counter;
-	struct timespec last_timestamp;
+	ktime_t	     last_timestamp;
 	char fps_str[FPS_LENGTH];
 	char fips_str[FPS_LENGTH];
 };
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index 0100c7c668b7..0bdc385eec17 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -65,7 +65,7 @@
 #define HDMI_DELAY          (5)
 
 /* Delay introduced by the DVO in nb of pixel */
-#define DVO_DELAY           (2)
+#define DVO_DELAY           (7)
 
 /* delay introduced by the Arbitrary Waveform Generator in nb of pixels */
 #define AWG_DELAY_HD        (-9)
diff --git a/drivers/gpu/drm/sun4i/Kconfig b/drivers/gpu/drm/sun4i/Kconfig
index 99510e64e91a..a4b357db8856 100644
--- a/drivers/gpu/drm/sun4i/Kconfig
+++ b/drivers/gpu/drm/sun4i/Kconfig
@@ -1,6 +1,6 @@
 config DRM_SUN4I
 	tristate "DRM Support for Allwinner A10 Display Engine"
-	depends on DRM && ARM
+	depends on DRM && ARM && COMMON_CLK
 	depends on ARCH_SUNXI || COMPILE_TEST
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index f7a15c1a93bf..3ab560450a82 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -190,7 +190,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	/* Get the physical address of the buffer in memory */
 	gem = drm_fb_cma_get_gem_obj(fb, 0);
 
-	DRM_DEBUG_DRIVER("Using GEM @ 0x%x\n", gem->paddr);
+	DRM_DEBUG_DRIVER("Using GEM @ %pad\n", &gem->paddr);
 
 	/* Compute the start of the displayed memory */
 	bpp = drm_format_plane_cpp(fb->pixel_format, 0);
@@ -198,7 +198,7 @@ int sun4i_backend_update_layer_buffer(struct sun4i_backend *backend,
 	paddr += (state->src_x >> 16) * bpp;
 	paddr += (state->src_y >> 16) * fb->pitches[0];
 
-	DRM_DEBUG_DRIVER("Setting buffer address to 0x%x\n", paddr);
+	DRM_DEBUG_DRIVER("Setting buffer address to %pad\n", &paddr);
 
 	/* Write the 32 lower bits of the address (in bits) */
 	lo_paddr = paddr << 3;
diff --git a/drivers/gpu/drm/sun4i/sun4i_dotclock.c b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
index 3ff668cb463c..5b3463197c48 100644
--- a/drivers/gpu/drm/sun4i/sun4i_dotclock.c
+++ b/drivers/gpu/drm/sun4i/sun4i_dotclock.c
@@ -72,14 +72,40 @@ static unsigned long sun4i_dclk_recalc_rate(struct clk_hw *hw,
 static long sun4i_dclk_round_rate(struct clk_hw *hw, unsigned long rate,
 				  unsigned long *parent_rate)
 {
-	return *parent_rate / DIV_ROUND_CLOSEST(*parent_rate, rate);
+	unsigned long best_parent = 0;
+	u8 best_div = 1;
+	int i;
+
+	for (i = 6; i < 127; i++) {
+		unsigned long ideal = rate * i;
+		unsigned long rounded;
+
+		rounded = clk_hw_round_rate(clk_hw_get_parent(hw),
+					    ideal);
+
+		if (rounded == ideal) {
+			best_parent = rounded;
+			best_div = i;
+			goto out;
+		}
+
+		if ((rounded < ideal) && (rounded > best_parent)) {
+			best_parent = rounded;
+			best_div = i;
+		}
+	}
+
+out:
+	*parent_rate = best_parent;
+
+	return best_parent / best_div;
 }
 
 static int sun4i_dclk_set_rate(struct clk_hw *hw, unsigned long rate,
 			       unsigned long parent_rate)
 {
 	struct sun4i_dclk *dclk = hw_to_dclk(hw);
-	int div = DIV_ROUND_CLOSEST(parent_rate, rate);
+	u8 div = parent_rate / rate;
 
 	return regmap_update_bits(dclk->regmap, SUN4I_TCON0_DCLK_REG,
 				  GENMASK(6, 0), div);
@@ -127,10 +153,14 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	const char *clk_name, *parent_name;
 	struct clk_init_data init;
 	struct sun4i_dclk *dclk;
+	int ret;
 
 	parent_name = __clk_get_name(tcon->sclk0);
-	of_property_read_string_index(dev->of_node, "clock-output-names", 0,
-				      &clk_name);
+	ret = of_property_read_string_index(dev->of_node,
+					    "clock-output-names", 0,
+					    &clk_name);
+	if (ret)
+		return ret;
 
 	dclk = devm_kzalloc(dev, sizeof(*dclk), GFP_KERNEL);
 	if (!dclk)
@@ -140,6 +170,7 @@ int sun4i_dclk_create(struct device *dev, struct sun4i_tcon *tcon)
 	init.ops = &sun4i_dclk_ops;
 	init.parent_names = &parent_name;
 	init.num_parents = 1;
+	init.flags = CLK_SET_RATE_PARENT;
 
 	dclk->regmap = tcon->regs;
 	dclk->hw.init = &init;
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 9a67f927a53e..5b89940edcb1 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -97,6 +97,22 @@ static struct drm_driver sun4i_drv_driver = {
 	.disable_vblank		= sun4i_drv_disable_vblank,
 };
 
+static void sun4i_remove_framebuffers(void)
+{
+	struct apertures_struct *ap;
+
+	ap = alloc_apertures(1);
+	if (!ap)
+		return;
+
+	/* The framebuffer can be located anywhere in RAM */
+	ap->ranges[0].base = 0;
+	ap->ranges[0].size = ~0;
+
+	remove_conflicting_framebuffers(ap, "sun4i-drm-fb", false);
+	kfree(ap);
+}
+
 static int sun4i_drv_bind(struct device *dev)
 {
 	struct drm_device *drm;
@@ -140,6 +156,9 @@ static int sun4i_drv_bind(struct device *dev)
 	}
 	drm->irq_enabled = true;
 
+	/* Remove early framebuffers (ie. simplefb) */
+	sun4i_remove_framebuffers();
+
 	/* Create our framebuffer */
 	drv->fbdev = sun4i_framebuffer_init(drm);
 	if (IS_ERR(drv->fbdev)) {
@@ -166,6 +185,7 @@ static void sun4i_drv_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 
+	drm_connector_unregister_all(drm);
 	drm_dev_unregister(drm);
 	drm_kms_helper_poll_fini(drm);
 	sun4i_framebuffer_free(drm);
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index 442cfe271688..f5bbac6efb4c 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -54,8 +54,13 @@ static int sun4i_rgb_get_modes(struct drm_connector *connector)
 static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
+	struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
+	struct sun4i_drv *drv = rgb->drv;
+	struct sun4i_tcon *tcon = drv->tcon;
 	u32 hsync = mode->hsync_end - mode->hsync_start;
 	u32 vsync = mode->vsync_end - mode->vsync_start;
+	unsigned long rate = mode->clock * 1000;
+	long rounded_rate;
 
 	DRM_DEBUG_DRIVER("Validating modes...\n");
 
@@ -87,6 +92,15 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 
 	DRM_DEBUG_DRIVER("Vertical parameters OK\n");
 
+	rounded_rate = clk_round_rate(tcon->dclk, rate);
+	if (rounded_rate < rate)
+		return MODE_CLOCK_LOW;
+
+	if (rounded_rate > rate)
+		return MODE_CLOCK_HIGH;
+
+	DRM_DEBUG_DRIVER("Clock rate OK\n");
+
 	return MODE_OK;
 }
 
@@ -193,7 +207,7 @@ int sun4i_rgb_init(struct drm_device *drm)
 	int ret;
 
 	/* If we don't have a panel, there's no point in going on */
-	if (!tcon->panel)
+	if (IS_ERR(tcon->panel))
 		return -ENODEV;
 
 	rgb = devm_kzalloc(drm->dev, sizeof(*rgb), GFP_KERNEL);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index 9f19b0e08560..652385f09735 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -425,11 +425,11 @@ static struct drm_panel *sun4i_tcon_find_panel(struct device_node *node)
 
 	remote = of_graph_get_remote_port_parent(end_node);
 	if (!remote) {
-		DRM_DEBUG_DRIVER("Enable to parse remote node\n");
+		DRM_DEBUG_DRIVER("Unable to parse remote node\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return of_drm_find_panel(remote);
+	return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER);
 }
 
 static int sun4i_tcon_bind(struct device *dev, struct device *master,
@@ -490,7 +490,11 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return 0;
 	}
 
-	return sun4i_rgb_init(drm);
+	ret = sun4i_rgb_init(drm);
+	if (ret < 0)
+		goto err_free_clocks;
+
+	return 0;
 
 err_free_clocks:
 	sun4i_tcon_free_clocks(tcon);
@@ -522,12 +526,13 @@ static int sun4i_tcon_probe(struct platform_device *pdev)
 	 * Defer the probe.
 	 */
 	panel = sun4i_tcon_find_panel(node);
-	if (IS_ERR(panel)) {
-		/*
-		 * If we don't have a panel endpoint, just go on
-		 */
-		if (PTR_ERR(panel) != -ENODEV)
-			return -EPROBE_DEFER;
+
+	/*
+	 * If we don't have a panel endpoint, just go on
+	 */
+	if (PTR_ERR(panel) == -EPROBE_DEFER) {
+		DRM_DEBUG_DRIVER("Still waiting for our panel. Deferring...\n");
+		return -EPROBE_DEFER;
 	}
 
 	return component_add(&pdev->dev, &sun4i_tcon_ops);
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 39386f50af87..e340d0d66429 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -147,9 +147,9 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	BUG_ON(!list_empty(&bo->lru));
 	BUG_ON(!list_empty(&bo->ddestroy));
 
-	if (bo->ttm)
-		ttm_tt_destroy(bo->ttm);
+	ttm_tt_destroy(bo->ttm);
 	atomic_dec(&bo->glob->bo_count);
+	fence_put(bo->moving);
 	if (bo->resv == &bo->ttm_resv)
 		reservation_object_fini(&bo->ttm_resv);
 	mutex_destroy(&bo->wu_mutex);
@@ -360,7 +360,8 @@ static int ttm_bo_handle_move_mem(struct ttm_buffer_object *bo,
 		ret = bdev->driver->move(bo, evict, interruptible,
 					 no_wait_gpu, mem);
 	else
-		ret = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, mem);
+		ret = ttm_bo_move_memcpy(bo, evict, interruptible,
+					 no_wait_gpu, mem);
 
 	if (ret) {
 		if (bdev->driver->move_notify) {
@@ -396,8 +397,7 @@ moved:
 
 out_err:
 	new_man = &bdev->man[bo->mem.mem_type];
-	if ((new_man->flags & TTM_MEMTYPE_FLAG_FIXED) && bo->ttm) {
-		ttm_tt_unbind(bo->ttm);
+	if (new_man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 		ttm_tt_destroy(bo->ttm);
 		bo->ttm = NULL;
 	}
@@ -418,11 +418,8 @@ static void ttm_bo_cleanup_memtype_use(struct ttm_buffer_object *bo)
 	if (bo->bdev->driver->move_notify)
 		bo->bdev->driver->move_notify(bo, NULL);
 
-	if (bo->ttm) {
-		ttm_tt_unbind(bo->ttm);
-		ttm_tt_destroy(bo->ttm);
-		bo->ttm = NULL;
-	}
+	ttm_tt_destroy(bo->ttm);
+	bo->ttm = NULL;
 	ttm_bo_mem_put(bo, &bo->mem);
 
 	ww_mutex_unlock (&bo->resv->lock);
@@ -688,15 +685,6 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 	struct ttm_placement placement;
 	int ret = 0;
 
-	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
-
-	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS) {
-			pr_err("Failed to expire sync object before buffer eviction\n");
-		}
-		goto out;
-	}
-
 	lockdep_assert_held(&bo->resv->lock.base);
 
 	evict_mem = bo->mem;
@@ -720,7 +708,7 @@ static int ttm_bo_evict(struct ttm_buffer_object *bo, bool interruptible,
 
 	ret = ttm_bo_handle_move_mem(bo, &evict_mem, true, interruptible,
 				     no_wait_gpu);
-	if (ret) {
+	if (unlikely(ret)) {
 		if (ret != -ERESTARTSYS)
 			pr_err("Buffer eviction failed\n");
 		ttm_bo_mem_put(bo, &evict_mem);
@@ -800,6 +788,34 @@ void ttm_bo_mem_put(struct ttm_buffer_object *bo, struct ttm_mem_reg *mem)
 EXPORT_SYMBOL(ttm_bo_mem_put);
 
 /**
+ * Add the last move fence to the BO and reserve a new shared slot.
+ */
+static int ttm_bo_add_move_fence(struct ttm_buffer_object *bo,
+				 struct ttm_mem_type_manager *man,
+				 struct ttm_mem_reg *mem)
+{
+	struct fence *fence;
+	int ret;
+
+	spin_lock(&man->move_lock);
+	fence = fence_get(man->move);
+	spin_unlock(&man->move_lock);
+
+	if (fence) {
+		reservation_object_add_shared_fence(bo->resv, fence);
+
+		ret = reservation_object_reserve_shared(bo->resv);
+		if (unlikely(ret))
+			return ret;
+
+		fence_put(bo->moving);
+		bo->moving = fence;
+	}
+
+	return 0;
+}
+
+/**
  * Repeatedly evict memory from the LRU for @mem_type until we create enough
  * space, or we've evicted everything and there isn't enough space.
  */
@@ -825,10 +841,8 @@ static int ttm_bo_mem_force_space(struct ttm_buffer_object *bo,
 		if (unlikely(ret != 0))
 			return ret;
 	} while (1);
-	if (mem->mm_node == NULL)
-		return -ENOMEM;
 	mem->mem_type = mem_type;
-	return 0;
+	return ttm_bo_add_move_fence(bo, man, mem);
 }
 
 static uint32_t ttm_bo_select_caching(struct ttm_mem_type_manager *man,
@@ -898,6 +912,10 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 	bool has_erestartsys = false;
 	int i, ret;
 
+	ret = reservation_object_reserve_shared(bo->resv);
+	if (unlikely(ret))
+		return ret;
+
 	mem->mm_node = NULL;
 	for (i = 0; i < placement->num_placement; ++i) {
 		const struct ttm_place *place = &placement->placement[i];
@@ -931,9 +949,15 @@ int ttm_bo_mem_space(struct ttm_buffer_object *bo,
 		ret = (*man->func->get_node)(man, bo, place, mem);
 		if (unlikely(ret))
 			return ret;
-		
-		if (mem->mm_node)
+
+		if (mem->mm_node) {
+			ret = ttm_bo_add_move_fence(bo, man, mem);
+			if (unlikely(ret)) {
+				(*man->func->put_node)(man, mem);
+				return ret;
+			}
 			break;
+		}
 	}
 
 	if ((type_ok && (mem_type == TTM_PL_SYSTEM)) || mem->mm_node) {
@@ -1000,20 +1024,6 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
 
 	lockdep_assert_held(&bo->resv->lock.base);
 
-	/*
-	 * Don't wait for the BO on initial allocation. This is important when
-	 * the BO has an imported reservation object.
-	 */
-	if (bo->mem.mem_type != TTM_PL_SYSTEM || bo->ttm != NULL) {
-		/*
-		 * FIXME: It's possible to pipeline buffer moves.
-		 * Have the driver move function wait for idle when necessary,
-		 * instead of doing it here.
-		 */
-		ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
-		if (ret)
-			return ret;
-	}
 	mem.num_pages = bo->num_pages;
 	mem.size = mem.num_pages << PAGE_SHIFT;
 	mem.page_alignment = bo->mem.page_alignment;
@@ -1165,7 +1175,7 @@ int ttm_bo_init(struct ttm_bo_device *bdev,
 	bo->mem.page_alignment = page_alignment;
 	bo->mem.bus.io_reserved_vm = false;
 	bo->mem.bus.io_reserved_count = 0;
-	bo->priv_flags = 0;
+	bo->moving = NULL;
 	bo->mem.placement = (TTM_PL_FLAG_SYSTEM | TTM_PL_FLAG_CACHED);
 	bo->persistent_swap_storage = persistent_swap_storage;
 	bo->acc_size = acc_size;
@@ -1277,6 +1287,7 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_bo_global *glob = bdev->glob;
+	struct fence *fence;
 	int ret;
 
 	/*
@@ -1297,6 +1308,23 @@ static int ttm_bo_force_list_clean(struct ttm_bo_device *bdev,
 		spin_lock(&glob->lru_lock);
 	}
 	spin_unlock(&glob->lru_lock);
+
+	spin_lock(&man->move_lock);
+	fence = fence_get(man->move);
+	spin_unlock(&man->move_lock);
+
+	if (fence) {
+		ret = fence_wait(fence, false);
+		fence_put(fence);
+		if (ret) {
+			if (allow_errors) {
+				return ret;
+			} else {
+				pr_err("Cleanup eviction failed\n");
+			}
+		}
+	}
+
 	return 0;
 }
 
@@ -1316,6 +1344,7 @@ int ttm_bo_clean_mm(struct ttm_bo_device *bdev, unsigned mem_type)
 		       mem_type);
 		return ret;
 	}
+	fence_put(man->move);
 
 	man->use_type = false;
 	man->has_type = false;
@@ -1361,6 +1390,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	man->io_reserve_fastpath = true;
 	man->use_io_reserve_lru = false;
 	mutex_init(&man->io_reserve_mutex);
+	spin_lock_init(&man->move_lock);
 	INIT_LIST_HEAD(&man->io_reserve_lru);
 
 	ret = bdev->driver->init_mem_type(bdev, type, man);
@@ -1379,6 +1409,7 @@ int ttm_bo_init_mm(struct ttm_bo_device *bdev, unsigned type,
 	man->size = p_size;
 
 	INIT_LIST_HEAD(&man->lru);
+	man->move = NULL;
 
 	return 0;
 }
@@ -1572,47 +1603,17 @@ EXPORT_SYMBOL(ttm_bo_unmap_virtual);
 int ttm_bo_wait(struct ttm_buffer_object *bo,
 		bool interruptible, bool no_wait)
 {
-	struct reservation_object_list *fobj;
-	struct reservation_object *resv;
-	struct fence *excl;
-	long timeout = 15 * HZ;
-	int i;
-
-	resv = bo->resv;
-	fobj = reservation_object_get_list(resv);
-	excl = reservation_object_get_excl(resv);
-	if (excl) {
-		if (!fence_is_signaled(excl)) {
-			if (no_wait)
-				return -EBUSY;
-
-			timeout = fence_wait_timeout(excl,
-						     interruptible, timeout);
-		}
-	}
-
-	for (i = 0; fobj && timeout > 0 && i < fobj->shared_count; ++i) {
-		struct fence *fence;
-		fence = rcu_dereference_protected(fobj->shared[i],
-						reservation_object_held(resv));
-
-		if (!fence_is_signaled(fence)) {
-			if (no_wait)
-				return -EBUSY;
-
-			timeout = fence_wait_timeout(fence,
-						     interruptible, timeout);
-		}
-	}
+	long timeout = no_wait ? 0 : 15 * HZ;
 
+	timeout = reservation_object_wait_timeout_rcu(bo->resv, true,
+						      interruptible, timeout);
 	if (timeout < 0)
 		return timeout;
 
 	if (timeout == 0)
 		return -EBUSY;
 
-	reservation_object_add_excl_fence(resv, NULL);
-	clear_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+	reservation_object_add_excl_fence(bo->resv, NULL);
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_wait);
@@ -1682,14 +1683,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	ttm_bo_list_ref_sub(bo, put_count, true);
 
 	/**
-	 * Wait for GPU, then move to system cached.
+	 * Move to system cached
 	 */
 
-	ret = ttm_bo_wait(bo, false, false);
-
-	if (unlikely(ret != 0))
-		goto out;
-
 	if ((bo->mem.placement & swap_placement) != swap_placement) {
 		struct ttm_mem_reg evict_mem;
 
@@ -1704,6 +1700,14 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 			goto out;
 	}
 
+	/**
+	 * Make sure BO is idle.
+	 */
+
+	ret = ttm_bo_wait(bo, false, false);
+	if (unlikely(ret != 0))
+		goto out;
+
 	ttm_bo_unmap_virtual(bo);
 
 	/**
diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c
index d9831559706e..4da0e784f9e7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_util.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_util.c
@@ -53,7 +53,6 @@ int ttm_bo_move_ttm(struct ttm_buffer_object *bo,
 	int ret;
 
 	if (old_mem->mem_type != TTM_PL_SYSTEM) {
-		ttm_tt_unbind(ttm);
 		ttm_bo_free_old_node(bo);
 		ttm_flag_masked(&old_mem->placement, TTM_PL_FLAG_SYSTEM,
 				TTM_PL_MASK_MEM);
@@ -321,7 +320,8 @@ static int ttm_copy_ttm_io_page(struct ttm_tt *ttm, void *dst,
 }
 
 int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
-		       bool evict, bool no_wait_gpu,
+		       bool evict, bool interruptible,
+		       bool no_wait_gpu,
 		       struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -337,6 +337,10 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
 	unsigned long add = 0;
 	int dir;
 
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	ret = ttm_mem_reg_ioremap(bdev, old_mem, &old_iomap);
 	if (ret)
 		return ret;
@@ -401,8 +405,7 @@ out2:
 	*old_mem = *new_mem;
 	new_mem->mm_node = NULL;
 
-	if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) && (ttm != NULL)) {
-		ttm_tt_unbind(ttm);
+	if (man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 		ttm_tt_destroy(ttm);
 		bo->ttm = NULL;
 	}
@@ -462,6 +465,7 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo,
 	INIT_LIST_HEAD(&fbo->lru);
 	INIT_LIST_HEAD(&fbo->swap);
 	INIT_LIST_HEAD(&fbo->io_reserve_lru);
+	fbo->moving = NULL;
 	drm_vma_node_reset(&fbo->vma_node);
 	atomic_set(&fbo->cpu_writers, 0);
 
@@ -634,7 +638,6 @@ EXPORT_SYMBOL(ttm_bo_kunmap);
 int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 			      struct fence *fence,
 			      bool evict,
-			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -649,9 +652,7 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		if (ret)
 			return ret;
 
-		if ((man->flags & TTM_MEMTYPE_FLAG_FIXED) &&
-		    (bo->ttm != NULL)) {
-			ttm_tt_unbind(bo->ttm);
+		if (man->flags & TTM_MEMTYPE_FLAG_FIXED) {
 			ttm_tt_destroy(bo->ttm);
 			bo->ttm = NULL;
 		}
@@ -665,7 +666,8 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 		 * operation has completed.
 		 */
 
-		set_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags);
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
 
 		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
 		if (ret)
@@ -694,3 +696,95 @@ int ttm_bo_move_accel_cleanup(struct ttm_buffer_object *bo,
 	return 0;
 }
 EXPORT_SYMBOL(ttm_bo_move_accel_cleanup);
+
+int ttm_bo_pipeline_move(struct ttm_buffer_object *bo,
+			 struct fence *fence, bool evict,
+			 struct ttm_mem_reg *new_mem)
+{
+	struct ttm_bo_device *bdev = bo->bdev;
+	struct ttm_mem_reg *old_mem = &bo->mem;
+
+	struct ttm_mem_type_manager *from = &bdev->man[old_mem->mem_type];
+	struct ttm_mem_type_manager *to = &bdev->man[new_mem->mem_type];
+
+	int ret;
+
+	reservation_object_add_excl_fence(bo->resv, fence);
+
+	if (!evict) {
+		struct ttm_buffer_object *ghost_obj;
+
+		/**
+		 * This should help pipeline ordinary buffer moves.
+		 *
+		 * Hang old buffer memory on a new buffer object,
+		 * and leave it to be released when the GPU
+		 * operation has completed.
+		 */
+
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
+
+		ret = ttm_buffer_object_transfer(bo, &ghost_obj);
+		if (ret)
+			return ret;
+
+		reservation_object_add_excl_fence(ghost_obj->resv, fence);
+
+		/**
+		 * If we're not moving to fixed memory, the TTM object
+		 * needs to stay alive. Otherwhise hang it on the ghost
+		 * bo to be unbound and destroyed.
+		 */
+
+		if (!(to->flags & TTM_MEMTYPE_FLAG_FIXED))
+			ghost_obj->ttm = NULL;
+		else
+			bo->ttm = NULL;
+
+		ttm_bo_unreserve(ghost_obj);
+		ttm_bo_unref(&ghost_obj);
+
+	} else if (from->flags & TTM_MEMTYPE_FLAG_FIXED) {
+
+		/**
+		 * BO doesn't have a TTM we need to bind/unbind. Just remember
+		 * this eviction and free up the allocation
+		 */
+
+		spin_lock(&from->move_lock);
+		if (!from->move || fence_is_later(fence, from->move)) {
+			fence_put(from->move);
+			from->move = fence_get(fence);
+		}
+		spin_unlock(&from->move_lock);
+
+		ttm_bo_free_old_node(bo);
+
+		fence_put(bo->moving);
+		bo->moving = fence_get(fence);
+
+	} else {
+		/**
+		 * Last resort, wait for the move to be completed.
+		 *
+		 * Should never happen in pratice.
+		 */
+
+		ret = ttm_bo_wait(bo, false, false);
+		if (ret)
+			return ret;
+
+		if (to->flags & TTM_MEMTYPE_FLAG_FIXED) {
+			ttm_tt_destroy(bo->ttm);
+			bo->ttm = NULL;
+		}
+		ttm_bo_free_old_node(bo);
+	}
+
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+
+	return 0;
+}
+EXPORT_SYMBOL(ttm_bo_pipeline_move);
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 3216878bced3..a6ed9d5e5167 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -48,15 +48,14 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 {
 	int ret = 0;
 
-	if (likely(!test_bit(TTM_BO_PRIV_FLAG_MOVING, &bo->priv_flags)))
+	if (likely(!bo->moving))
 		goto out_unlock;
 
 	/*
 	 * Quick non-stalling check for idle.
 	 */
-	ret = ttm_bo_wait(bo, false, true);
-	if (likely(ret == 0))
-		goto out_unlock;
+	if (fence_is_signaled(bo->moving))
+		goto out_clear;
 
 	/*
 	 * If possible, avoid waiting for GPU with mmap_sem
@@ -68,17 +67,23 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 			goto out_unlock;
 
 		up_read(&vma->vm_mm->mmap_sem);
-		(void) ttm_bo_wait(bo, true, false);
+		(void) fence_wait(bo->moving, true);
 		goto out_unlock;
 	}
 
 	/*
 	 * Ordinary wait.
 	 */
-	ret = ttm_bo_wait(bo, true, false);
-	if (unlikely(ret != 0))
+	ret = fence_wait(bo->moving, true);
+	if (unlikely(ret != 0)) {
 		ret = (ret != -ERESTARTSYS) ? VM_FAULT_SIGBUS :
 			VM_FAULT_NOPAGE;
+		goto out_unlock;
+	}
+
+out_clear:
+	fence_put(bo->moving);
+	bo->moving = NULL;
 
 out_unlock:
 	return ret;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 077ae9b2865d..d28d4333dcce 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -166,11 +166,15 @@ EXPORT_SYMBOL(ttm_tt_set_placement_caching);
 
 void ttm_tt_destroy(struct ttm_tt *ttm)
 {
-	if (unlikely(ttm == NULL))
+	int ret;
+
+	if (ttm == NULL)
 		return;
 
 	if (ttm->state == tt_bound) {
-		ttm_tt_unbind(ttm);
+		ret = ttm->func->unbind(ttm);
+		BUG_ON(ret);
+		ttm->state = tt_unbound;
 	}
 
 	if (ttm->state == tt_unbound)
@@ -251,17 +255,6 @@ void ttm_dma_tt_fini(struct ttm_dma_tt *ttm_dma)
 }
 EXPORT_SYMBOL(ttm_dma_tt_fini);
 
-void ttm_tt_unbind(struct ttm_tt *ttm)
-{
-	int ret;
-
-	if (ttm->state == tt_bound) {
-		ret = ttm->func->unbind(ttm);
-		BUG_ON(ret);
-		ttm->state = tt_unbound;
-	}
-}
-
 int ttm_tt_bind(struct ttm_tt *ttm, struct ttm_mem_reg *bo_mem)
 {
 	int ret = 0;
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 4c0f26a644a3..c82d468d178b 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -449,14 +449,6 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 
 	WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
 
-	HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
-		  vc4_state->mm.start);
-
-	if (debug_dump_regs) {
-		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
-		vc4_hvs_dump_state(dev);
-	}
-
 	if (crtc->state->event) {
 		unsigned long flags;
 
@@ -466,8 +458,20 @@ static void vc4_crtc_atomic_flush(struct drm_crtc *crtc,
 
 		spin_lock_irqsave(&dev->event_lock, flags);
 		vc4_crtc->event = crtc->state->event;
-		spin_unlock_irqrestore(&dev->event_lock, flags);
 		crtc->state->event = NULL;
+
+		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+			  vc4_state->mm.start);
+
+		spin_unlock_irqrestore(&dev->event_lock, flags);
+	} else {
+		HVS_WRITE(SCALER_DISPLISTX(vc4_crtc->channel),
+			  vc4_state->mm.start);
+	}
+
+	if (debug_dump_regs) {
+		DRM_INFO("CRTC %d HVS after:\n", drm_crtc_index(crtc));
+		vc4_hvs_dump_state(dev);
 	}
 }
 
@@ -493,12 +497,17 @@ static void vc4_crtc_handle_page_flip(struct vc4_crtc *vc4_crtc)
 {
 	struct drm_crtc *crtc = &vc4_crtc->base;
 	struct drm_device *dev = crtc->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc->state);
+	u32 chan = vc4_crtc->channel;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev->event_lock, flags);
-	if (vc4_crtc->event) {
+	if (vc4_crtc->event &&
+	    (vc4_state->mm.start == HVS_READ(SCALER_DISPLACTX(chan)))) {
 		drm_crtc_send_vblank_event(crtc, vc4_crtc->event);
 		vc4_crtc->event = NULL;
+		drm_crtc_vblank_put(crtc);
 	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
@@ -549,6 +558,7 @@ vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
 		spin_unlock_irqrestore(&dev->event_lock, flags);
 	}
 
+	drm_crtc_vblank_put(crtc);
 	drm_framebuffer_unreference(flip_state->fb);
 	kfree(flip_state);
 
@@ -591,6 +601,8 @@ static int vc4_async_page_flip(struct drm_crtc *crtc,
 		return ret;
 	}
 
+	WARN_ON(drm_crtc_vblank_get(crtc) != 0);
+
 	/* Immediately update the plane's legacy fb pointer, so that later
 	 * modeset prep sees the state that will be present when the semaphore
 	 * is released.
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 9e88231b8906..54d0471243dd 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -66,12 +66,12 @@ static const struct file_operations vc4_drm_fops = {
 };
 
 static const struct drm_ioctl_desc vc4_drm_ioctls[] = {
-	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, 0),
+	DRM_IOCTL_DEF_DRV(VC4_SUBMIT_CL, vc4_submit_cl_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_SEQNO, vc4_wait_seqno_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_WAIT_BO, vc4_wait_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_BO, vc4_create_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_MMAP_BO, vc4_mmap_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_CREATE_SHADER_BO, vc4_create_shader_bo_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_GET_HANG_STATE, vc4_get_hang_state_ioctl,
 			  DRM_ROOT_ONLY),
 };
@@ -91,7 +91,7 @@ static struct drm_driver vc4_drm_driver = {
 
 	.enable_vblank = vc4_enable_vblank,
 	.disable_vblank = vc4_disable_vblank,
-	.get_vblank_counter = drm_vblank_count,
+	.get_vblank_counter = drm_vblank_no_hw_counter,
 
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = vc4_debugfs_init,
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 8f4d5ffc32be..9a217fd025f3 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -119,10 +119,18 @@ static int vc4_atomic_commit(struct drm_device *dev,
 		return -ENOMEM;
 
 	/* Make sure that any outstanding modesets have finished. */
-	ret = down_interruptible(&vc4->async_modeset);
-	if (ret) {
-		kfree(c);
-		return ret;
+	if (nonblock) {
+		ret = down_trylock(&vc4->async_modeset);
+		if (ret) {
+			kfree(c);
+			return -EBUSY;
+		}
+	} else {
+		ret = down_interruptible(&vc4->async_modeset);
+		if (ret) {
+			kfree(c);
+			return ret;
+		}
 	}
 
 	ret = drm_atomic_helper_prepare_planes(dev, state);
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 6163b95c5411..f99eece4cc97 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -341,6 +341,10 @@
 #define SCALER_DISPLACT0                        0x00000030
 #define SCALER_DISPLACT1                        0x00000034
 #define SCALER_DISPLACT2                        0x00000038
+#define SCALER_DISPLACTX(x)			(SCALER_DISPLACT0 +	\
+						 (x) * (SCALER_DISPLACT1 - \
+							SCALER_DISPLACT0))
+
 #define SCALER_DISPCTRL0                        0x00000040
 # define SCALER_DISPCTRLX_ENABLE		BIT(31)
 # define SCALER_DISPCTRLX_RESET			BIT(30)
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index a0580815629f..80482ac5f95d 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -375,6 +375,12 @@ static int virtio_gpu_bo_move(struct ttm_buffer_object *bo,
 			      bool no_wait_gpu,
 			      struct ttm_mem_reg *new_mem)
 {
+	int ret;
+
+	ret = ttm_bo_wait(bo, interruptible, no_wait_gpu);
+	if (ret)
+		return ret;
+
 	virtio_gpu_move_null(bo, new_mem);
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index 6de283c8fa3e..f0374f9b56ca 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -28,6 +28,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/frame.h>
 #include <asm/hypervisor.h>
 #include "drmP.h"
 #include "vmwgfx_msg.h"
@@ -194,7 +195,7 @@ static int vmw_send_msg(struct rpc_channel *channel, const char *msg)
 
 	return -EINVAL;
 }
-
+STACK_FRAME_NON_STANDARD(vmw_send_msg);
 
 
 /**
@@ -304,6 +305,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg,
 
 	return 0;
 }
+STACK_FRAME_NON_STANDARD(vmw_recv_msg);
 
 
 /**
diff --git a/drivers/hid/hid-elo.c b/drivers/hid/hid-elo.c
index aad8c162a825..0cd4f7216239 100644
--- a/drivers/hid/hid-elo.c
+++ b/drivers/hid/hid-elo.c
@@ -261,7 +261,7 @@ static void elo_remove(struct hid_device *hdev)
 	struct elo_priv *priv = hid_get_drvdata(hdev);
 
 	hid_hw_stop(hdev);
-	flush_workqueue(wq);
+	cancel_delayed_work_sync(&priv->work);
 	kfree(priv);
 }
 
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index c741f5e50a66..95b7d61d9910 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -1401,6 +1401,11 @@ static const struct hid_device_id mt_devices[] = {
 		MT_USB_DEVICE(USB_VENDOR_ID_NOVATEK,
 			USB_DEVICE_ID_NOVATEK_PCT) },
 
+	/* Ntrig Panel */
+	{ .driver_data = MT_CLS_NSMU,
+		HID_DEVICE(BUS_I2C, HID_GROUP_MULTITOUCH_WIN_8,
+			USB_VENDOR_ID_NTRIG, 0x1b05) },
+
 	/* PixArt optical touch screen */
 	{ .driver_data = MT_CLS_INRANGE_CONTACTNUMBER,
 		MT_USB_DEVICE(USB_VENDOR_ID_PIXART,
diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 2f1ddca6f2e0..700145b15088 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -516,13 +516,13 @@ static noinline int hiddev_ioctl_usage(struct hiddev *hiddev, unsigned int cmd,
 					goto inval;
 			} else if (uref->usage_index >= field->report_count)
 				goto inval;
-
-			else if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
-				 (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
-				  uref->usage_index + uref_multi->num_values > field->report_count))
-				goto inval;
 		}
 
+		if ((cmd == HIDIOCGUSAGES || cmd == HIDIOCSUSAGES) &&
+		    (uref_multi->num_values > HID_MAX_MULTI_USAGES ||
+		     uref->usage_index + uref_multi->num_values > field->report_count))
+			goto inval;
+
 		switch (cmd) {
 		case HIDIOCGUSAGE:
 			uref->value = field->value[uref->usage_index];
diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c
index c43318d3416e..2ac87d553e22 100644
--- a/drivers/hwmon/dell-smm-hwmon.c
+++ b/drivers/hwmon/dell-smm-hwmon.c
@@ -35,6 +35,7 @@
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #include <linux/sched.h>
+#include <linux/ctype.h>
 
 #include <linux/i8k.h>
 
@@ -66,11 +67,13 @@
 
 static DEFINE_MUTEX(i8k_mutex);
 static char bios_version[4];
+static char bios_machineid[16];
 static struct device *i8k_hwmon_dev;
 static u32 i8k_hwmon_flags;
 static uint i8k_fan_mult = I8K_FAN_MULT;
 static uint i8k_pwm_mult;
 static uint i8k_fan_max = I8K_FAN_HIGH;
+static bool disallow_fan_type_call;
 
 #define I8K_HWMON_HAVE_TEMP1	(1 << 0)
 #define I8K_HWMON_HAVE_TEMP2	(1 << 1)
@@ -94,13 +97,13 @@ module_param(ignore_dmi, bool, 0);
 MODULE_PARM_DESC(ignore_dmi, "Continue probing hardware even if DMI data does not match");
 
 #if IS_ENABLED(CONFIG_I8K)
-static bool restricted;
+static bool restricted = true;
 module_param(restricted, bool, 0);
-MODULE_PARM_DESC(restricted, "Allow fan control if SYS_ADMIN capability set");
+MODULE_PARM_DESC(restricted, "Restrict fan control and serial number to CAP_SYS_ADMIN (default: 1)");
 
 static bool power_status;
 module_param(power_status, bool, 0600);
-MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k");
+MODULE_PARM_DESC(power_status, "Report power status in /proc/i8k (default: 0)");
 #endif
 
 static uint fan_mult;
@@ -235,14 +238,28 @@ static int i8k_get_fan_speed(int fan)
 /*
  * Read the fan type.
  */
-static int i8k_get_fan_type(int fan)
+static int _i8k_get_fan_type(int fan)
 {
 	struct smm_regs regs = { .eax = I8K_SMM_GET_FAN_TYPE, };
 
+	if (disallow_fan_type_call)
+		return -EINVAL;
+
 	regs.ebx = fan & 0xff;
 	return i8k_smm(&regs) ? : regs.eax & 0xff;
 }
 
+static int i8k_get_fan_type(int fan)
+{
+	/* I8K_SMM_GET_FAN_TYPE SMM call is expensive, so cache values */
+	static int types[2] = { INT_MIN, INT_MIN };
+
+	if (types[fan] == INT_MIN)
+		types[fan] = _i8k_get_fan_type(fan);
+
+	return types[fan];
+}
+
 /*
  * Read the fan nominal rpm for specific fan speed.
  */
@@ -387,14 +404,20 @@ i8k_ioctl_unlocked(struct file *fp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case I8K_BIOS_VERSION:
+		if (!isdigit(bios_version[0]) || !isdigit(bios_version[1]) ||
+		    !isdigit(bios_version[2]))
+			return -EINVAL;
+
 		val = (bios_version[0] << 16) |
 				(bios_version[1] << 8) | bios_version[2];
 		break;
 
 	case I8K_MACHINE_ID:
-		memset(buff, 0, 16);
-		strlcpy(buff, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
-			sizeof(buff));
+		if (restricted && !capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		memset(buff, 0, sizeof(buff));
+		strlcpy(buff, bios_machineid, sizeof(buff));
 		break;
 
 	case I8K_FN_STATUS:
@@ -511,7 +534,7 @@ static int i8k_proc_show(struct seq_file *seq, void *offset)
 	seq_printf(seq, "%s %s %s %d %d %d %d %d %d %d\n",
 		   I8K_PROC_FMT,
 		   bios_version,
-		   i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		   (restricted && !capable(CAP_SYS_ADMIN)) ? "-1" : bios_machineid,
 		   cpu_temp,
 		   left_fan, right_fan, left_speed, right_speed,
 		   ac_power, fn_key);
@@ -718,6 +741,9 @@ static struct attribute *i8k_attrs[] = {
 static umode_t i8k_is_visible(struct kobject *kobj, struct attribute *attr,
 			      int index)
 {
+	if (disallow_fan_type_call &&
+	    (index == 9 || index == 12))
+		return 0;
 	if (index >= 0 && index <= 1 &&
 	    !(i8k_hwmon_flags & I8K_HWMON_HAVE_TEMP1))
 		return 0;
@@ -767,13 +793,17 @@ static int __init i8k_init_hwmon(void)
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_TEMP4;
 
-	/* First fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(0);
+	/* First fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(0);
+	if (err < 0)
+		err = i8k_get_fan_type(0);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN1;
 
-	/* Second fan attributes, if fan type is OK */
-	err = i8k_get_fan_type(1);
+	/* Second fan attributes, if fan status or type is OK */
+	err = i8k_get_fan_status(1);
+	if (err < 0)
+		err = i8k_get_fan_type(1);
 	if (err >= 0)
 		i8k_hwmon_flags |= I8K_HWMON_HAVE_FAN2;
 
@@ -929,12 +959,14 @@ static struct dmi_system_id i8k_dmi_table[] __initdata = {
 
 MODULE_DEVICE_TABLE(dmi, i8k_dmi_table);
 
-static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
+/*
+ * On some machines once I8K_SMM_GET_FAN_TYPE is issued then CPU fan speed
+ * randomly going up and down due to bug in Dell SMM or BIOS. Here is blacklist
+ * of affected Dell machines for which we disallow I8K_SMM_GET_FAN_TYPE call.
+ * See bug: https://bugzilla.kernel.org/show_bug.cgi?id=100121
+ */
+static struct dmi_system_id i8k_blacklist_fan_type_dmi_table[] __initdata = {
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8000
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8000",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
@@ -942,16 +974,19 @@ static struct dmi_system_id i8k_blacklist_dmi_table[] __initdata = {
 		},
 	},
 	{
-		/*
-		 * CPU fan speed going up and down on Dell Studio XPS 8100
-		 * for unknown reasons.
-		 */
 		.ident = "Dell Studio XPS 8100",
 		.matches = {
 			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
 			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Studio XPS 8100"),
 		},
 	},
+	{
+		.ident = "Dell Inspiron 580",
+		.matches = {
+			DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Inspiron 580 "),
+		},
+	},
 	{ }
 };
 
@@ -966,8 +1001,7 @@ static int __init i8k_probe(void)
 	/*
 	 * Get DMI information
 	 */
-	if (!dmi_check_system(i8k_dmi_table) ||
-	    dmi_check_system(i8k_blacklist_dmi_table)) {
+	if (!dmi_check_system(i8k_dmi_table)) {
 		if (!ignore_dmi && !force)
 			return -ENODEV;
 
@@ -978,8 +1012,13 @@ static int __init i8k_probe(void)
 			i8k_get_dmi_data(DMI_BIOS_VERSION));
 	}
 
+	if (dmi_check_system(i8k_blacklist_fan_type_dmi_table))
+		disallow_fan_type_call = true;
+
 	strlcpy(bios_version, i8k_get_dmi_data(DMI_BIOS_VERSION),
 		sizeof(bios_version));
+	strlcpy(bios_machineid, i8k_get_dmi_data(DMI_PRODUCT_SERIAL),
+		sizeof(bios_machineid));
 
 	/*
 	 * Get SMM Dell signature
diff --git a/drivers/hwmon/fam15h_power.c b/drivers/hwmon/fam15h_power.c
index eb97a9241d17..15aa49d082c4 100644
--- a/drivers/hwmon/fam15h_power.c
+++ b/drivers/hwmon/fam15h_power.c
@@ -172,9 +172,9 @@ static void do_read_registers_on_cu(void *_data)
  */
 static int read_registers(struct fam15h_power_data *data)
 {
-	int this_cpu, ret, cpu;
 	int core, this_core;
 	cpumask_var_t mask;
+	int ret, cpu;
 
 	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
 	if (!ret)
@@ -183,7 +183,6 @@ static int read_registers(struct fam15h_power_data *data)
 	memset(data->cu_on, 0, sizeof(int) * MAX_CUS);
 
 	get_online_cpus();
-	this_cpu = smp_processor_id();
 
 	/*
 	 * Choose the first online core of each compute unit, and then
@@ -205,12 +204,9 @@ static int read_registers(struct fam15h_power_data *data)
 		cpumask_set_cpu(cpumask_any(topology_sibling_cpumask(cpu)), mask);
 	}
 
-	if (cpumask_test_cpu(this_cpu, mask))
-		do_read_registers_on_cu(data);
+	on_each_cpu_mask(mask, do_read_registers_on_cu, data, true);
 
-	smp_call_function_many(mask, do_read_registers_on_cu, data, true);
 	put_online_cpus();
-
 	free_cpumask_var(mask);
 
 	return 0;
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index c9ff08dbe10c..e30a5939dc0d 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -375,7 +375,7 @@ struct lm90_data {
 	int kind;
 	u32 flags;
 
-	int update_interval;	/* in milliseconds */
+	unsigned int update_interval; /* in milliseconds */
 
 	u8 config_orig;		/* Original configuration register value */
 	u8 convrate_orig;	/* Original conversion rate register value */
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 847d1b5f2c13..688be9e060fc 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -300,13 +300,10 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 	if (local_read(&drvdata->mode) == CS_MODE_SYSFS) {
 		/*
 		 * The trace run will continue with the same allocated trace
-		 * buffer. As such zero-out the buffer so that we don't end
-		 * up with stale data.
-		 *
-		 * Since the tracer is still enabled drvdata::buf
-		 * can't be NULL.
+		 * buffer. The trace buffer is cleared in tmc_etr_enable_hw(),
+		 * so we don't have to explicitly clear it. Also, since the
+		 * tracer is still enabled drvdata::buf can't be NULL.
 		 */
-		memset(drvdata->buf, 0, drvdata->size);
 		tmc_etr_enable_hw(drvdata);
 	} else {
 		/*
@@ -315,7 +312,7 @@ int tmc_read_unprepare_etr(struct tmc_drvdata *drvdata)
 		 */
 		vaddr = drvdata->vaddr;
 		paddr = drvdata->paddr;
-		drvdata->buf = NULL;
+		drvdata->buf = drvdata->vaddr = NULL;
 	}
 
 	drvdata->reading = false;
diff --git a/drivers/hwtracing/coresight/coresight.c b/drivers/hwtracing/coresight/coresight.c
index 5443d03a1eec..d08d1ab9bba5 100644
--- a/drivers/hwtracing/coresight/coresight.c
+++ b/drivers/hwtracing/coresight/coresight.c
@@ -385,7 +385,6 @@ static int _coresight_build_path(struct coresight_device *csdev,
 	int i;
 	bool found = false;
 	struct coresight_node *node;
-	struct coresight_connection *conn;
 
 	/* An activated sink has been found.  Enqueue the element */
 	if ((csdev->type == CORESIGHT_DEV_TYPE_SINK ||
@@ -394,8 +393,9 @@ static int _coresight_build_path(struct coresight_device *csdev,
 
 	/* Not a sink - recursively explore each port found on this element */
 	for (i = 0; i < csdev->nr_outport; i++) {
-		conn = &csdev->conns[i];
-		if (_coresight_build_path(conn->child_dev, path) == 0) {
+		struct coresight_device *child_dev = csdev->conns[i].child_dev;
+
+		if (child_dev && _coresight_build_path(child_dev, path) == 0) {
 			found = true;
 			break;
 		}
@@ -425,6 +425,7 @@ out:
 struct list_head *coresight_build_path(struct coresight_device *csdev)
 {
 	struct list_head *path;
+	int rc;
 
 	path = kzalloc(sizeof(struct list_head), GFP_KERNEL);
 	if (!path)
@@ -432,9 +433,10 @@ struct list_head *coresight_build_path(struct coresight_device *csdev)
 
 	INIT_LIST_HEAD(path);
 
-	if (_coresight_build_path(csdev, path)) {
+	rc = _coresight_build_path(csdev, path);
+	if (rc) {
 		kfree(path);
-		path = NULL;
+		return ERR_PTR(rc);
 	}
 
 	return path;
@@ -507,8 +509,9 @@ int coresight_enable(struct coresight_device *csdev)
 		goto out;
 
 	path = coresight_build_path(csdev);
-	if (!path) {
+	if (IS_ERR(path)) {
 		pr_err("building path(s) failed\n");
+		ret = PTR_ERR(path);
 		goto out;
 	}
 
diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c
index 64b1208bca5e..4a60ad214747 100644
--- a/drivers/i2c/busses/i2c-i801.c
+++ b/drivers/i2c/busses/i2c-i801.c
@@ -245,6 +245,13 @@ struct i801_priv {
 	struct platform_device *mux_pdev;
 #endif
 	struct platform_device *tco_pdev;
+
+	/*
+	 * If set to true the host controller registers are reserved for
+	 * ACPI AML use. Protected by acpi_lock.
+	 */
+	bool acpi_reserved;
+	struct mutex acpi_lock;
 };
 
 #define FEATURE_SMBUS_PEC	(1 << 0)
@@ -718,6 +725,12 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
 	int ret = 0, xact = 0;
 	struct i801_priv *priv = i2c_get_adapdata(adap);
 
+	mutex_lock(&priv->acpi_lock);
+	if (priv->acpi_reserved) {
+		mutex_unlock(&priv->acpi_lock);
+		return -EBUSY;
+	}
+
 	pm_runtime_get_sync(&priv->pci_dev->dev);
 
 	hwpec = (priv->features & FEATURE_SMBUS_PEC) && (flags & I2C_CLIENT_PEC)
@@ -820,6 +833,7 @@ static s32 i801_access(struct i2c_adapter *adap, u16 addr,
 out:
 	pm_runtime_mark_last_busy(&priv->pci_dev->dev);
 	pm_runtime_put_autosuspend(&priv->pci_dev->dev);
+	mutex_unlock(&priv->acpi_lock);
 	return ret;
 }
 
@@ -1257,6 +1271,83 @@ static void i801_add_tco(struct i801_priv *priv)
 	priv->tco_pdev = pdev;
 }
 
+#ifdef CONFIG_ACPI
+static acpi_status
+i801_acpi_io_handler(u32 function, acpi_physical_address address, u32 bits,
+		     u64 *value, void *handler_context, void *region_context)
+{
+	struct i801_priv *priv = handler_context;
+	struct pci_dev *pdev = priv->pci_dev;
+	acpi_status status;
+
+	/*
+	 * Once BIOS AML code touches the OpRegion we warn and inhibit any
+	 * further access from the driver itself. This device is now owned
+	 * by the system firmware.
+	 */
+	mutex_lock(&priv->acpi_lock);
+
+	if (!priv->acpi_reserved) {
+		priv->acpi_reserved = true;
+
+		dev_warn(&pdev->dev, "BIOS is accessing SMBus registers\n");
+		dev_warn(&pdev->dev, "Driver SMBus register access inhibited\n");
+
+		/*
+		 * BIOS is accessing the host controller so prevent it from
+		 * suspending automatically from now on.
+		 */
+		pm_runtime_get_sync(&pdev->dev);
+	}
+
+	if ((function & ACPI_IO_MASK) == ACPI_READ)
+		status = acpi_os_read_port(address, (u32 *)value, bits);
+	else
+		status = acpi_os_write_port(address, (u32)*value, bits);
+
+	mutex_unlock(&priv->acpi_lock);
+
+	return status;
+}
+
+static int i801_acpi_probe(struct i801_priv *priv)
+{
+	struct acpi_device *adev;
+	acpi_status status;
+
+	adev = ACPI_COMPANION(&priv->pci_dev->dev);
+	if (adev) {
+		status = acpi_install_address_space_handler(adev->handle,
+				ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler,
+				NULL, priv);
+		if (ACPI_SUCCESS(status))
+			return 0;
+	}
+
+	return acpi_check_resource_conflict(&priv->pci_dev->resource[SMBBAR]);
+}
+
+static void i801_acpi_remove(struct i801_priv *priv)
+{
+	struct acpi_device *adev;
+
+	adev = ACPI_COMPANION(&priv->pci_dev->dev);
+	if (!adev)
+		return;
+
+	acpi_remove_address_space_handler(adev->handle,
+		ACPI_ADR_SPACE_SYSTEM_IO, i801_acpi_io_handler);
+
+	mutex_lock(&priv->acpi_lock);
+	if (priv->acpi_reserved)
+		pm_runtime_put(&priv->pci_dev->dev);
+	mutex_unlock(&priv->acpi_lock);
+}
+#else
+static inline int i801_acpi_probe(struct i801_priv *priv) { return 0; }
+static inline void i801_acpi_remove(struct i801_priv *priv) { }
+#endif
+
 static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
 	unsigned char temp;
@@ -1274,6 +1365,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	priv->adapter.dev.parent = &dev->dev;
 	ACPI_COMPANION_SET(&priv->adapter.dev, ACPI_COMPANION(&dev->dev));
 	priv->adapter.retries = 3;
+	mutex_init(&priv->acpi_lock);
 
 	priv->pci_dev = dev;
 	switch (dev->device) {
@@ -1336,10 +1428,8 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 		return -ENODEV;
 	}
 
-	err = acpi_check_resource_conflict(&dev->resource[SMBBAR]);
-	if (err) {
+	if (i801_acpi_probe(priv))
 		return -ENODEV;
-	}
 
 	err = pcim_iomap_regions(dev, 1 << SMBBAR,
 				 dev_driver_string(&dev->dev));
@@ -1348,6 +1438,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 			"Failed to request SMBus region 0x%lx-0x%Lx\n",
 			priv->smba,
 			(unsigned long long)pci_resource_end(dev, SMBBAR));
+		i801_acpi_remove(priv);
 		return err;
 	}
 
@@ -1412,6 +1503,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id)
 	err = i2c_add_adapter(&priv->adapter);
 	if (err) {
 		dev_err(&dev->dev, "Failed to add SMBus adapter\n");
+		i801_acpi_remove(priv);
 		return err;
 	}
 
@@ -1438,6 +1530,7 @@ static void i801_remove(struct pci_dev *dev)
 
 	i801_del_mux(priv);
 	i2c_del_adapter(&priv->adapter);
+	i801_acpi_remove(priv);
 	pci_write_config_byte(dev, SMBHSTCFG, priv->original_hstcfg);
 
 	platform_device_unregister(priv->tco_pdev);
diff --git a/drivers/i2c/busses/i2c-octeon.c b/drivers/i2c/busses/i2c-octeon.c
index aa5f01efd826..30ae35146723 100644
--- a/drivers/i2c/busses/i2c-octeon.c
+++ b/drivers/i2c/busses/i2c-octeon.c
@@ -934,8 +934,15 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 		return result;
 
 	for (i = 0; i < length; i++) {
-		/* for the last byte TWSI_CTL_AAK must not be set */
-		if (i + 1 == length)
+		/*
+		 * For the last byte to receive TWSI_CTL_AAK must not be set.
+		 *
+		 * A special case is I2C_M_RECV_LEN where we don't know the
+		 * additional length yet. If recv_len is set we assume we're
+		 * not reading the final byte and therefore need to set
+		 * TWSI_CTL_AAK.
+		 */
+		if ((i + 1 == length) && !(recv_len && i == 0))
 			final_read = true;
 
 		/* clear iflg to allow next event */
@@ -950,12 +957,8 @@ static int octeon_i2c_read(struct octeon_i2c *i2c, int target,
 
 		data[i] = octeon_i2c_data_read(i2c);
 		if (recv_len && i == 0) {
-			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1) {
-				dev_err(i2c->dev,
-					"%s: read len > I2C_SMBUS_BLOCK_MAX %d\n",
-					__func__, data[i]);
+			if (data[i] > I2C_SMBUS_BLOCK_MAX + 1)
 				return -EPROTO;
-			}
 			length += data[i];
 		}
 
diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 6773cadf7c9f..26e7c5187a58 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -260,6 +260,7 @@ static struct platform_driver i2c_mux_reg_driver = {
 	.remove	= i2c_mux_reg_remove,
 	.driver	= {
 		.name	= "i2c-mux-reg",
+		.of_match_table = of_match_ptr(i2c_mux_reg_of_match),
 	},
 };
 
diff --git a/drivers/iio/accel/st_accel_buffer.c b/drivers/iio/accel/st_accel_buffer.c
index a1e642ee13d6..7fddc137e91e 100644
--- a/drivers/iio/accel/st_accel_buffer.c
+++ b/drivers/iio/accel/st_accel_buffer.c
@@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_accel_buffer_setup_ops = {
 
 int st_accel_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_accel_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index dc73f2d85e6d..4d95bfc4786c 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -741,6 +741,7 @@ static const struct iio_info accel_info = {
 static const struct iio_trigger_ops st_accel_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_ACCEL_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_ACCEL_TRIGGER_OPS (&st_accel_trigger_ops)
 #else
diff --git a/drivers/iio/common/st_sensors/st_sensors_buffer.c b/drivers/iio/common/st_sensors/st_sensors_buffer.c
index c55898543a47..f1693dbebb8a 100644
--- a/drivers/iio/common/st_sensors/st_sensors_buffer.c
+++ b/drivers/iio/common/st_sensors/st_sensors_buffer.c
@@ -57,31 +57,20 @@ irqreturn_t st_sensors_trigger_handler(int irq, void *p)
 	struct iio_poll_func *pf = p;
 	struct iio_dev *indio_dev = pf->indio_dev;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	s64 timestamp;
 
-	/* If we have a status register, check if this IRQ came from us */
-	if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) {
-		u8 status;
-
-		len = sdata->tf->read_byte(&sdata->tb, sdata->dev,
-			   sdata->sensor_settings->drdy_irq.addr_stat_drdy,
-			   &status);
-		if (len < 0)
-			dev_err(sdata->dev, "could not read channel status\n");
-
-		/*
-		 * If this was not caused by any channels on this sensor,
-		 * return IRQ_NONE
-		 */
-		if (!(status & (u8)indio_dev->active_scan_mask[0]))
-			return IRQ_NONE;
-	}
+	/* If we do timetamping here, do it before reading the values */
+	if (sdata->hw_irq_trigger)
+		timestamp = sdata->hw_timestamp;
+	else
+		timestamp = iio_get_time_ns();
 
 	len = st_sensors_get_buffer_element(indio_dev, sdata->buffer_data);
 	if (len < 0)
 		goto st_sensors_get_buffer_element_error;
 
 	iio_push_to_buffers_with_timestamp(indio_dev, sdata->buffer_data,
-		pf->timestamp);
+					   timestamp);
 
 st_sensors_get_buffer_element_error:
 	iio_trigger_notify_done(indio_dev->trig);
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index dffe00692169..9e59c90f6a8d 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -363,6 +363,11 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
 	if (err < 0)
 		return err;
 
+	/* Disable DRDY, this might be still be enabled after reboot. */
+	err = st_sensors_set_dataready_irq(indio_dev, false);
+	if (err < 0)
+		return err;
+
 	if (sdata->current_fullscale) {
 		err = st_sensors_set_fullscale(indio_dev,
 						sdata->current_fullscale->num);
@@ -424,6 +429,9 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable)
 	else
 		drdy_mask = sdata->sensor_settings->drdy_irq.mask_int2;
 
+	/* Flag to the poll function that the hardware trigger is in use */
+	sdata->hw_irq_trigger = enable;
+
 	/* Enable/Disable the interrupt generator for data ready. */
 	err = st_sensors_write_data_with_mask(indio_dev,
 					sdata->sensor_settings->drdy_irq.addr,
diff --git a/drivers/iio/common/st_sensors/st_sensors_trigger.c b/drivers/iio/common/st_sensors/st_sensors_trigger.c
index da72279fcf99..296e4ff19ae8 100644
--- a/drivers/iio/common/st_sensors/st_sensors_trigger.c
+++ b/drivers/iio/common/st_sensors/st_sensors_trigger.c
@@ -17,6 +17,73 @@
 #include <linux/iio/common/st_sensors.h>
 #include "st_sensors_core.h"
 
+/**
+ * st_sensors_irq_handler() - top half of the IRQ-based triggers
+ * @irq: irq number
+ * @p: private handler data
+ */
+irqreturn_t st_sensors_irq_handler(int irq, void *p)
+{
+	struct iio_trigger *trig = p;
+	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+
+	/* Get the time stamp as close in time as possible */
+	sdata->hw_timestamp = iio_get_time_ns();
+	return IRQ_WAKE_THREAD;
+}
+
+/**
+ * st_sensors_irq_thread() - bottom half of the IRQ-based triggers
+ * @irq: irq number
+ * @p: private handler data
+ */
+irqreturn_t st_sensors_irq_thread(int irq, void *p)
+{
+	struct iio_trigger *trig = p;
+	struct iio_dev *indio_dev = iio_trigger_get_drvdata(trig);
+	struct st_sensor_data *sdata = iio_priv(indio_dev);
+	int ret;
+
+	/*
+	 * If this trigger is backed by a hardware interrupt and we have a
+	 * status register, check if this IRQ came from us
+	 */
+	if (sdata->sensor_settings->drdy_irq.addr_stat_drdy) {
+		u8 status;
+
+		ret = sdata->tf->read_byte(&sdata->tb, sdata->dev,
+			   sdata->sensor_settings->drdy_irq.addr_stat_drdy,
+			   &status);
+		if (ret < 0) {
+			dev_err(sdata->dev, "could not read channel status\n");
+			goto out_poll;
+		}
+		/*
+		 * the lower bits of .active_scan_mask[0] is directly mapped
+		 * to the channels on the sensor: either bit 0 for
+		 * one-dimensional sensors, or e.g. x,y,z for accelerometers,
+		 * gyroscopes or magnetometers. No sensor use more than 3
+		 * channels, so cut the other status bits here.
+		 */
+		status &= 0x07;
+
+		/*
+		 * If this was not caused by any channels on this sensor,
+		 * return IRQ_NONE
+		 */
+		if (!indio_dev->active_scan_mask)
+			return IRQ_NONE;
+		if (!(status & (u8)indio_dev->active_scan_mask[0]))
+			return IRQ_NONE;
+	}
+
+out_poll:
+	/* It's our IRQ: proceed to handle the register polling */
+	iio_trigger_poll_chained(p);
+	return IRQ_HANDLED;
+}
+
 int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 				const struct iio_trigger_ops *trigger_ops)
 {
@@ -30,6 +97,10 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 		return -ENOMEM;
 	}
 
+	iio_trigger_set_drvdata(sdata->trig, indio_dev);
+	sdata->trig->ops = trigger_ops;
+	sdata->trig->dev.parent = sdata->dev;
+
 	irq = sdata->get_irq_data_ready(indio_dev);
 	irq_trig = irqd_get_trigger_type(irq_get_irq_data(irq));
 	/*
@@ -77,9 +148,12 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 	    sdata->sensor_settings->drdy_irq.addr_stat_drdy)
 		irq_trig |= IRQF_SHARED;
 
-	err = request_threaded_irq(irq,
-			iio_trigger_generic_data_rdy_poll,
-			NULL,
+	/* Let's create an interrupt thread masking the hard IRQ here */
+	irq_trig |= IRQF_ONESHOT;
+
+	err = request_threaded_irq(sdata->get_irq_data_ready(indio_dev),
+			st_sensors_irq_handler,
+			st_sensors_irq_thread,
 			irq_trig,
 			sdata->trig->name,
 			sdata->trig);
@@ -88,10 +162,6 @@ int st_sensors_allocate_trigger(struct iio_dev *indio_dev,
 		goto iio_trigger_free;
 	}
 
-	iio_trigger_set_drvdata(sdata->trig, indio_dev);
-	sdata->trig->ops = trigger_ops;
-	sdata->trig->dev.parent = sdata->dev;
-
 	err = iio_trigger_register(sdata->trig);
 	if (err < 0) {
 		dev_err(&indio_dev->dev, "failed to register iio trigger.\n");
@@ -119,6 +189,18 @@ void st_sensors_deallocate_trigger(struct iio_dev *indio_dev)
 }
 EXPORT_SYMBOL(st_sensors_deallocate_trigger);
 
+int st_sensors_validate_device(struct iio_trigger *trig,
+			       struct iio_dev *indio_dev)
+{
+	struct iio_dev *indio = iio_trigger_get_drvdata(trig);
+
+	if (indio != indio_dev)
+		return -EINVAL;
+
+	return 0;
+}
+EXPORT_SYMBOL(st_sensors_validate_device);
+
 MODULE_AUTHOR("Denis Ciocca <denis.ciocca@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics ST-sensors trigger");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/dac/Kconfig b/drivers/iio/dac/Kconfig
index e63b957c985f..f7c71da42f15 100644
--- a/drivers/iio/dac/Kconfig
+++ b/drivers/iio/dac/Kconfig
@@ -247,7 +247,7 @@ config MCP4922
 
 config STX104
 	tristate "Apex Embedded Systems STX104 DAC driver"
-	depends on X86 && ISA
+	depends on X86 && ISA_BUS_API
 	help
 	  Say yes here to build support for the 2-channel DAC on the Apex
 	  Embedded Systems STX104 integrated analog PC/104 card. The base port
diff --git a/drivers/iio/dac/ad5592r-base.c b/drivers/iio/dac/ad5592r-base.c
index 948f600e7059..69bde5909854 100644
--- a/drivers/iio/dac/ad5592r-base.c
+++ b/drivers/iio/dac/ad5592r-base.c
@@ -525,7 +525,7 @@ static int ad5592r_alloc_channels(struct ad5592r_state *st)
 
 	device_for_each_child_node(st->dev, child) {
 		ret = fwnode_property_read_u32(child, "reg", &reg);
-		if (ret || reg > ARRAY_SIZE(st->channel_modes))
+		if (ret || reg >= ARRAY_SIZE(st->channel_modes))
 			continue;
 
 		ret = fwnode_property_read_u32(child, "adi,mode", &tmp);
diff --git a/drivers/iio/gyro/st_gyro_buffer.c b/drivers/iio/gyro/st_gyro_buffer.c
index d67b17b6a7aa..a5377044e42f 100644
--- a/drivers/iio/gyro/st_gyro_buffer.c
+++ b/drivers/iio/gyro/st_gyro_buffer.c
@@ -91,7 +91,7 @@ static const struct iio_buffer_setup_ops st_gyro_buffer_setup_ops = {
 
 int st_gyro_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_gyro_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index 52a3c87c375c..a8012955a1f6 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -409,6 +409,7 @@ static const struct iio_info gyro_info = {
 static const struct iio_trigger_ops st_gyro_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_GYRO_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_GYRO_TRIGGER_OPS (&st_gyro_trigger_ops)
 #else
diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c
index 3be6d209a159..11535911a5c6 100644
--- a/drivers/iio/humidity/am2315.c
+++ b/drivers/iio/humidity/am2315.c
@@ -165,10 +165,8 @@ static irqreturn_t am2315_trigger_handler(int irq, void *p)
 	struct am2315_sensor_data sensor_data;
 
 	ret = am2315_read_data(data, &sensor_data);
-	if (ret < 0) {
-		mutex_unlock(&data->lock);
+	if (ret < 0)
 		goto err;
-	}
 
 	mutex_lock(&data->lock);
 	if (*(indio_dev->active_scan_mask) == AM2315_ALL_CHANNEL_MASK) {
diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
index fa4767613173..a03832a5fc95 100644
--- a/drivers/iio/humidity/hdc100x.c
+++ b/drivers/iio/humidity/hdc100x.c
@@ -55,7 +55,7 @@ static const struct {
 	},
 	{ /* IIO_HUMIDITYRELATIVE channel */
 		.shift = 8,
-		.mask = 2,
+		.mask = 3,
 	},
 };
 
@@ -164,14 +164,14 @@ static int hdc100x_get_measurement(struct hdc100x_data *data,
 		dev_err(&client->dev, "cannot read high byte measurement");
 		return ret;
 	}
-	val = ret << 6;
+	val = ret << 8;
 
 	ret = i2c_smbus_read_byte(client);
 	if (ret < 0) {
 		dev_err(&client->dev, "cannot read low byte measurement");
 		return ret;
 	}
-	val |= ret >> 2;
+	val |= ret;
 
 	return val;
 }
@@ -211,18 +211,18 @@ static int hdc100x_read_raw(struct iio_dev *indio_dev,
 		return IIO_VAL_INT_PLUS_MICRO;
 	case IIO_CHAN_INFO_SCALE:
 		if (chan->type == IIO_TEMP) {
-			*val = 165;
-			*val2 = 65536 >> 2;
+			*val = 165000;
+			*val2 = 65536;
 			return IIO_VAL_FRACTIONAL;
 		} else {
-			*val = 0;
-			*val2 = 10000;
-			return IIO_VAL_INT_PLUS_MICRO;
+			*val = 100;
+			*val2 = 65536;
+			return IIO_VAL_FRACTIONAL;
 		}
 		break;
 	case IIO_CHAN_INFO_OFFSET:
-		*val = -3971;
-		*val2 = 879096;
+		*val = -15887;
+		*val2 = 515151;
 		return IIO_VAL_INT_PLUS_MICRO;
 	default:
 		return -EINVAL;
diff --git a/drivers/iio/imu/bmi160/bmi160_core.c b/drivers/iio/imu/bmi160/bmi160_core.c
index 0bf92b06d7d8..b8a290ec984e 100644
--- a/drivers/iio/imu/bmi160/bmi160_core.c
+++ b/drivers/iio/imu/bmi160/bmi160_core.c
@@ -209,11 +209,11 @@ static const struct  bmi160_scale_item bmi160_scale_table[] = {
 };
 
 static const struct bmi160_odr bmi160_accel_odr[] = {
-	{0x01, 0, 78125},
-	{0x02, 1, 5625},
-	{0x03, 3, 125},
-	{0x04, 6, 25},
-	{0x05, 12, 5},
+	{0x01, 0, 781250},
+	{0x02, 1, 562500},
+	{0x03, 3, 125000},
+	{0x04, 6, 250000},
+	{0x05, 12, 500000},
 	{0x06, 25, 0},
 	{0x07, 50, 0},
 	{0x08, 100, 0},
@@ -229,7 +229,7 @@ static const struct bmi160_odr bmi160_gyro_odr[] = {
 	{0x08, 100, 0},
 	{0x09, 200, 0},
 	{0x0A, 400, 0},
-	{0x0B, 8000, 0},
+	{0x0B, 800, 0},
 	{0x0C, 1600, 0},
 	{0x0D, 3200, 0},
 };
@@ -364,8 +364,8 @@ int bmi160_set_odr(struct bmi160_data *data, enum bmi160_sensor_type t,
 
 	return regmap_update_bits(data->regmap,
 				  bmi160_regs[t].config,
-				  bmi160_odr_table[t].tbl[i].bits,
-				  bmi160_regs[t].config_odr_mask);
+				  bmi160_regs[t].config_odr_mask,
+				  bmi160_odr_table[t].tbl[i].bits);
 }
 
 static int bmi160_get_odr(struct bmi160_data *data, enum bmi160_sensor_type t,
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index ae2806aafb72..0c52dfe64977 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -210,22 +210,35 @@ static int iio_trigger_attach_poll_func(struct iio_trigger *trig,
 
 	/* Prevent the module from being removed whilst attached to a trigger */
 	__module_get(pf->indio_dev->info->driver_module);
+
+	/* Get irq number */
 	pf->irq = iio_trigger_get_irq(trig);
+	if (pf->irq < 0)
+		goto out_put_module;
+
+	/* Request irq */
 	ret = request_threaded_irq(pf->irq, pf->h, pf->thread,
 				   pf->type, pf->name,
 				   pf);
-	if (ret < 0) {
-		module_put(pf->indio_dev->info->driver_module);
-		return ret;
-	}
+	if (ret < 0)
+		goto out_put_irq;
 
+	/* Enable trigger in driver */
 	if (trig->ops && trig->ops->set_trigger_state && notinuse) {
 		ret = trig->ops->set_trigger_state(trig, true);
 		if (ret < 0)
-			module_put(pf->indio_dev->info->driver_module);
+			goto out_free_irq;
 	}
 
 	return ret;
+
+out_free_irq:
+	free_irq(pf->irq, pf);
+out_put_irq:
+	iio_trigger_put_irq(trig, pf->irq);
+out_put_module:
+	module_put(pf->indio_dev->info->driver_module);
+	return ret;
 }
 
 static int iio_trigger_detach_poll_func(struct iio_trigger *trig,
diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index b4dbb3912977..651d57b8abbf 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -1011,6 +1011,7 @@ static int apds9960_probe(struct i2c_client *client,
 
 	iio_device_attach_buffer(indio_dev, buffer);
 
+	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &apds9960_info;
 	indio_dev->name = APDS9960_DRV_NAME;
 	indio_dev->channels = apds9960_channels;
diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c
index 72b364e4aa72..b54dcba05a82 100644
--- a/drivers/iio/light/bh1780.c
+++ b/drivers/iio/light/bh1780.c
@@ -84,7 +84,7 @@ static int bh1780_debugfs_reg_access(struct iio_dev *indio_dev,
 	int ret;
 
 	if (!readval)
-		bh1780_write(bh1780, (u8)reg, (u8)writeval);
+		return bh1780_write(bh1780, (u8)reg, (u8)writeval);
 
 	ret = bh1780_read(bh1780, (u8)reg);
 	if (ret < 0)
@@ -187,7 +187,7 @@ static int bh1780_probe(struct i2c_client *client,
 
 	indio_dev->dev.parent = &client->dev;
 	indio_dev->info = &bh1780_info;
-	indio_dev->name = id->name;
+	indio_dev->name = "bh1780";
 	indio_dev->channels = bh1780_channels;
 	indio_dev->num_channels = ARRAY_SIZE(bh1780_channels);
 	indio_dev->modes = INDIO_DIRECT_MODE;
@@ -226,7 +226,8 @@ static int bh1780_remove(struct i2c_client *client)
 static int bh1780_runtime_suspend(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	struct bh1780_data *bh1780 = i2c_get_clientdata(client);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct bh1780_data *bh1780 = iio_priv(indio_dev);
 	int ret;
 
 	ret = bh1780_write(bh1780, BH1780_REG_CONTROL, BH1780_POFF);
@@ -241,7 +242,8 @@ static int bh1780_runtime_suspend(struct device *dev)
 static int bh1780_runtime_resume(struct device *dev)
 {
 	struct i2c_client *client = to_i2c_client(dev);
-	struct bh1780_data *bh1780 = i2c_get_clientdata(client);
+	struct iio_dev *indio_dev = i2c_get_clientdata(client);
+	struct bh1780_data *bh1780 = iio_priv(indio_dev);
 	int ret;
 
 	ret = bh1780_write(bh1780, BH1780_REG_CONTROL, BH1780_PON);
diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c
index e01e58a9bd14..f17cb2ea18f5 100644
--- a/drivers/iio/light/max44000.c
+++ b/drivers/iio/light/max44000.c
@@ -147,7 +147,6 @@ static const struct iio_chan_spec max44000_channels[] = {
 	{
 		.type = IIO_PROXIMITY,
 		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
-		.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE),
 		.scan_index = MAX44000_SCAN_INDEX_PRX,
 		.scan_type = {
 			.sign		= 'u',
diff --git a/drivers/iio/magnetometer/st_magn_buffer.c b/drivers/iio/magnetometer/st_magn_buffer.c
index ecd3bd0a9769..0a9e8fadfa9d 100644
--- a/drivers/iio/magnetometer/st_magn_buffer.c
+++ b/drivers/iio/magnetometer/st_magn_buffer.c
@@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_magn_buffer_setup_ops = {
 
 int st_magn_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_magn_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index 62036d2a9956..8250fc322c56 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -572,6 +572,7 @@ static const struct iio_info magn_info = {
 static const struct iio_trigger_ops st_magn_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_MAGN_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_MAGN_TRIGGER_OPS (&st_magn_trigger_ops)
 #else
diff --git a/drivers/iio/pressure/bmp280.c b/drivers/iio/pressure/bmp280.c
index 2f1498e12bb2..724452d61846 100644
--- a/drivers/iio/pressure/bmp280.c
+++ b/drivers/iio/pressure/bmp280.c
@@ -879,8 +879,8 @@ static int bmp280_probe(struct i2c_client *client,
 	if (ret < 0)
 		return ret;
 	if (chip_id != id->driver_data) {
-		dev_err(&client->dev, "bad chip id.  expected %x got %x\n",
-			BMP280_CHIP_ID, chip_id);
+		dev_err(&client->dev, "bad chip id.  expected %lx got %x\n",
+			id->driver_data, chip_id);
 		return -EINVAL;
 	}
 
diff --git a/drivers/iio/pressure/st_pressure_buffer.c b/drivers/iio/pressure/st_pressure_buffer.c
index 2ff53f222352..99468d0a64e7 100644
--- a/drivers/iio/pressure/st_pressure_buffer.c
+++ b/drivers/iio/pressure/st_pressure_buffer.c
@@ -82,7 +82,7 @@ static const struct iio_buffer_setup_ops st_press_buffer_setup_ops = {
 
 int st_press_allocate_ring(struct iio_dev *indio_dev)
 {
-	return iio_triggered_buffer_setup(indio_dev, &iio_pollfunc_store_time,
+	return iio_triggered_buffer_setup(indio_dev, NULL,
 		&st_sensors_trigger_handler, &st_press_buffer_setup_ops);
 }
 
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 9e9b72a8f18f..92a118c3c4ac 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -28,15 +28,21 @@
 #include <linux/iio/common/st_sensors.h>
 #include "st_pressure.h"
 
+#define MCELSIUS_PER_CELSIUS			1000
+
+/* Default pressure sensitivity */
 #define ST_PRESS_LSB_PER_MBAR			4096UL
 #define ST_PRESS_KPASCAL_NANO_SCALE		(100000000UL / \
 						 ST_PRESS_LSB_PER_MBAR)
+
+/* Default temperature sensitivity */
 #define ST_PRESS_LSB_PER_CELSIUS		480UL
-#define ST_PRESS_CELSIUS_NANO_SCALE		(1000000000UL / \
-						 ST_PRESS_LSB_PER_CELSIUS)
+#define ST_PRESS_MILLI_CELSIUS_OFFSET		42500UL
+
 #define ST_PRESS_NUMBER_DATA_CHANNELS		1
 
 /* FULLSCALE */
+#define ST_PRESS_FS_AVL_1100MB			1100
 #define ST_PRESS_FS_AVL_1260MB			1260
 
 #define ST_PRESS_1_OUT_XL_ADDR			0x28
@@ -54,9 +60,6 @@
 #define ST_PRESS_LPS331AP_PW_MASK		0x80
 #define ST_PRESS_LPS331AP_FS_ADDR		0x23
 #define ST_PRESS_LPS331AP_FS_MASK		0x30
-#define ST_PRESS_LPS331AP_FS_AVL_1260_VAL	0x00
-#define ST_PRESS_LPS331AP_FS_AVL_1260_GAIN	ST_PRESS_KPASCAL_NANO_SCALE
-#define ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN	ST_PRESS_CELSIUS_NANO_SCALE
 #define ST_PRESS_LPS331AP_BDU_ADDR		0x20
 #define ST_PRESS_LPS331AP_BDU_MASK		0x04
 #define ST_PRESS_LPS331AP_DRDY_IRQ_ADDR		0x22
@@ -67,9 +70,14 @@
 #define ST_PRESS_LPS331AP_OD_IRQ_ADDR		0x22
 #define ST_PRESS_LPS331AP_OD_IRQ_MASK		0x40
 #define ST_PRESS_LPS331AP_MULTIREAD_BIT		true
-#define ST_PRESS_LPS331AP_TEMP_OFFSET		42500
 
 /* CUSTOM VALUES FOR LPS001WP SENSOR */
+
+/* LPS001WP pressure resolution */
+#define ST_PRESS_LPS001WP_LSB_PER_MBAR		16UL
+/* LPS001WP temperature resolution */
+#define ST_PRESS_LPS001WP_LSB_PER_CELSIUS	64UL
+
 #define ST_PRESS_LPS001WP_WAI_EXP		0xba
 #define ST_PRESS_LPS001WP_ODR_ADDR		0x20
 #define ST_PRESS_LPS001WP_ODR_MASK		0x30
@@ -78,6 +86,8 @@
 #define ST_PRESS_LPS001WP_ODR_AVL_13HZ_VAL	0x03
 #define ST_PRESS_LPS001WP_PW_ADDR		0x20
 #define ST_PRESS_LPS001WP_PW_MASK		0x40
+#define ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN \
+	(100000000UL / ST_PRESS_LPS001WP_LSB_PER_MBAR)
 #define ST_PRESS_LPS001WP_BDU_ADDR		0x20
 #define ST_PRESS_LPS001WP_BDU_MASK		0x04
 #define ST_PRESS_LPS001WP_MULTIREAD_BIT		true
@@ -94,11 +104,6 @@
 #define ST_PRESS_LPS25H_ODR_AVL_25HZ_VAL	0x04
 #define ST_PRESS_LPS25H_PW_ADDR			0x20
 #define ST_PRESS_LPS25H_PW_MASK			0x80
-#define ST_PRESS_LPS25H_FS_ADDR			0x00
-#define ST_PRESS_LPS25H_FS_MASK			0x00
-#define ST_PRESS_LPS25H_FS_AVL_1260_VAL		0x00
-#define ST_PRESS_LPS25H_FS_AVL_1260_GAIN	ST_PRESS_KPASCAL_NANO_SCALE
-#define ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN	ST_PRESS_CELSIUS_NANO_SCALE
 #define ST_PRESS_LPS25H_BDU_ADDR		0x20
 #define ST_PRESS_LPS25H_BDU_MASK		0x04
 #define ST_PRESS_LPS25H_DRDY_IRQ_ADDR		0x23
@@ -109,7 +114,6 @@
 #define ST_PRESS_LPS25H_OD_IRQ_ADDR		0x22
 #define ST_PRESS_LPS25H_OD_IRQ_MASK		0x40
 #define ST_PRESS_LPS25H_MULTIREAD_BIT		true
-#define ST_PRESS_LPS25H_TEMP_OFFSET		42500
 #define ST_PRESS_LPS25H_OUT_XL_ADDR		0x28
 #define ST_TEMP_LPS25H_OUT_L_ADDR		0x2b
 
@@ -161,7 +165,9 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 			.storagebits = 16,
 			.endianness = IIO_LE,
 		},
-		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),
+		.info_mask_separate =
+			BIT(IIO_CHAN_INFO_RAW) |
+			BIT(IIO_CHAN_INFO_SCALE),
 		.modified = 0,
 	},
 	{
@@ -177,7 +183,7 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 		},
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
-			BIT(IIO_CHAN_INFO_OFFSET),
+			BIT(IIO_CHAN_INFO_SCALE),
 		.modified = 0,
 	},
 	IIO_CHAN_SOFT_TIMESTAMP(1)
@@ -212,11 +218,14 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.addr = ST_PRESS_LPS331AP_FS_ADDR,
 			.mask = ST_PRESS_LPS331AP_FS_MASK,
 			.fs_avl = {
+				/*
+				 * Pressure and temperature sensitivity values
+				 * as defined in table 3 of LPS331AP datasheet.
+				 */
 				[0] = {
 					.num = ST_PRESS_FS_AVL_1260MB,
-					.value = ST_PRESS_LPS331AP_FS_AVL_1260_VAL,
-					.gain = ST_PRESS_LPS331AP_FS_AVL_1260_GAIN,
-					.gain2 = ST_PRESS_LPS331AP_FS_AVL_TEMP_GAIN,
+					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
+					.gain2 = ST_PRESS_LSB_PER_CELSIUS,
 				},
 			},
 		},
@@ -261,7 +270,17 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
 		},
 		.fs = {
-			.addr = 0,
+			.fs_avl = {
+				/*
+				 * Pressure and temperature resolution values
+				 * as defined in table 3 of LPS001WP datasheet.
+				 */
+				[0] = {
+					.num = ST_PRESS_FS_AVL_1100MB,
+					.gain = ST_PRESS_LPS001WP_FS_AVL_PRESS_GAIN,
+					.gain2 = ST_PRESS_LPS001WP_LSB_PER_CELSIUS,
+				},
+			},
 		},
 		.bdu = {
 			.addr = ST_PRESS_LPS001WP_BDU_ADDR,
@@ -298,14 +317,15 @@ static const struct st_sensor_settings st_press_sensors_settings[] = {
 			.value_off = ST_SENSORS_DEFAULT_POWER_OFF_VALUE,
 		},
 		.fs = {
-			.addr = ST_PRESS_LPS25H_FS_ADDR,
-			.mask = ST_PRESS_LPS25H_FS_MASK,
 			.fs_avl = {
+				/*
+				 * Pressure and temperature sensitivity values
+				 * as defined in table 3 of LPS25H datasheet.
+				 */
 				[0] = {
 					.num = ST_PRESS_FS_AVL_1260MB,
-					.value = ST_PRESS_LPS25H_FS_AVL_1260_VAL,
-					.gain = ST_PRESS_LPS25H_FS_AVL_1260_GAIN,
-					.gain2 = ST_PRESS_LPS25H_FS_AVL_TEMP_GAIN,
+					.gain = ST_PRESS_KPASCAL_NANO_SCALE,
+					.gain2 = ST_PRESS_LSB_PER_CELSIUS,
 				},
 			},
 		},
@@ -364,26 +384,26 @@ static int st_press_read_raw(struct iio_dev *indio_dev,
 
 		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_SCALE:
-		*val = 0;
-
 		switch (ch->type) {
 		case IIO_PRESSURE:
+			*val = 0;
 			*val2 = press_data->current_fullscale->gain;
-			break;
+			return IIO_VAL_INT_PLUS_NANO;
 		case IIO_TEMP:
+			*val = MCELSIUS_PER_CELSIUS;
 			*val2 = press_data->current_fullscale->gain2;
-			break;
+			return IIO_VAL_FRACTIONAL;
 		default:
 			err = -EINVAL;
 			goto read_error;
 		}
 
-		return IIO_VAL_INT_PLUS_NANO;
 	case IIO_CHAN_INFO_OFFSET:
 		switch (ch->type) {
 		case IIO_TEMP:
-			*val = 425;
-			*val2 = 10;
+			*val = ST_PRESS_MILLI_CELSIUS_OFFSET *
+			       press_data->current_fullscale->gain2;
+			*val2 = MCELSIUS_PER_CELSIUS;
 			break;
 		default:
 			err = -EINVAL;
@@ -425,6 +445,7 @@ static const struct iio_info press_info = {
 static const struct iio_trigger_ops st_press_trigger_ops = {
 	.owner = THIS_MODULE,
 	.set_trigger_state = ST_PRESS_TRIGGER_SET_STATE,
+	.validate_device = st_sensors_validate_device,
 };
 #define ST_PRESS_TRIGGER_OPS (&st_press_trigger_ops)
 #else
diff --git a/drivers/iio/proximity/as3935.c b/drivers/iio/proximity/as3935.c
index f4d29d5dbd5f..e2f926cdcad2 100644
--- a/drivers/iio/proximity/as3935.c
+++ b/drivers/iio/proximity/as3935.c
@@ -64,6 +64,7 @@ struct as3935_state {
 	struct delayed_work work;
 
 	u32 tune_cap;
+	u8 buffer[16]; /* 8-bit data + 56-bit padding + 64-bit timestamp */
 	u8 buf[2] ____cacheline_aligned;
 };
 
@@ -72,7 +73,8 @@ static const struct iio_chan_spec as3935_channels[] = {
 		.type           = IIO_PROXIMITY,
 		.info_mask_separate =
 			BIT(IIO_CHAN_INFO_RAW) |
-			BIT(IIO_CHAN_INFO_PROCESSED),
+			BIT(IIO_CHAN_INFO_PROCESSED) |
+			BIT(IIO_CHAN_INFO_SCALE),
 		.scan_index     = 0,
 		.scan_type = {
 			.sign           = 'u',
@@ -181,7 +183,12 @@ static int as3935_read_raw(struct iio_dev *indio_dev,
 		/* storm out of range */
 		if (*val == AS3935_DATA_MASK)
 			return -EINVAL;
-		*val *= 1000;
+
+		if (m == IIO_CHAN_INFO_PROCESSED)
+			*val *= 1000;
+		break;
+	case IIO_CHAN_INFO_SCALE:
+		*val = 1000;
 		break;
 	default:
 		return -EINVAL;
@@ -206,10 +213,10 @@ static irqreturn_t as3935_trigger_handler(int irq, void *private)
 	ret = as3935_read(st, AS3935_DATA, &val);
 	if (ret)
 		goto err_read;
-	val &= AS3935_DATA_MASK;
-	val *= 1000;
 
-	iio_push_to_buffers_with_timestamp(indio_dev, &val, pf->timestamp);
+	st->buffer[0] = val & AS3935_DATA_MASK;
+	iio_push_to_buffers_with_timestamp(indio_dev, &st->buffer,
+					   pf->timestamp);
 err_read:
 	iio_trigger_notify_done(indio_dev->trig);
 
diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index c2e257d97eff..1a2984c28b95 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -178,6 +178,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 {
 	int ret = 0;
 	struct net_device *old_net_dev;
+	enum ib_gid_type old_gid_type;
 
 	/* in rdma_cap_roce_gid_table, this funciton should be protected by a
 	 * sleep-able lock.
@@ -199,6 +200,7 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 	}
 
 	old_net_dev = table->data_vec[ix].attr.ndev;
+	old_gid_type = table->data_vec[ix].attr.gid_type;
 	if (old_net_dev && old_net_dev != attr->ndev)
 		dev_put(old_net_dev);
 	/* if modify_gid failed, just delete the old gid */
@@ -207,10 +209,14 @@ static int write_gid(struct ib_device *ib_dev, u8 port,
 		attr = &zattr;
 		table->data_vec[ix].context = NULL;
 	}
-	if (default_gid)
-		table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
+
 	memcpy(&table->data_vec[ix].gid, gid, sizeof(*gid));
 	memcpy(&table->data_vec[ix].attr, attr, sizeof(*attr));
+	if (default_gid) {
+		table->data_vec[ix].props |= GID_TABLE_ENTRY_DEFAULT;
+		if (action == GID_TABLE_WRITE_ACTION_DEL)
+			table->data_vec[ix].attr.gid_type = old_gid_type;
+	}
 	if (table->data_vec[ix].attr.ndev &&
 	    table->data_vec[ix].attr.ndev != old_net_dev)
 		dev_hold(table->data_vec[ix].attr.ndev);
@@ -405,7 +411,9 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port,
 
 	for (ix = 0; ix < table->sz; ix++)
 		if (table->data_vec[ix].attr.ndev == ndev)
-			if (!del_gid(ib_dev, port, table, ix, false))
+			if (!del_gid(ib_dev, port, table, ix,
+				     !!(table->data_vec[ix].props &
+					GID_TABLE_ENTRY_DEFAULT)))
 				deleted = true;
 
 	write_unlock_irq(&table->rwlock);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1d92e091e22e..c99525512b34 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3452,14 +3452,14 @@ static int cm_establish(struct ib_cm_id *cm_id)
 	work->cm_event.event = IB_CM_USER_ESTABLISHED;
 
 	/* Check if the device started its remove_one */
-	spin_lock_irq(&cm.lock);
+	spin_lock_irqsave(&cm.lock, flags);
 	if (!cm_dev->going_down) {
 		queue_delayed_work(cm.wq, &work->work, 0);
 	} else {
 		kfree(work);
 		ret = -ENODEV;
 	}
-	spin_unlock_irq(&cm.lock);
+	spin_unlock_irqrestore(&cm.lock, flags);
 
 out:
 	return ret;
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index f0c91ba3178a..ad1b1adcf6f0 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -708,17 +708,6 @@ static void cma_deref_id(struct rdma_id_private *id_priv)
 		complete(&id_priv->comp);
 }
 
-static int cma_disable_callback(struct rdma_id_private *id_priv,
-				enum rdma_cm_state state)
-{
-	mutex_lock(&id_priv->handler_mutex);
-	if (id_priv->state != state) {
-		mutex_unlock(&id_priv->handler_mutex);
-		return -EINVAL;
-	}
-	return 0;
-}
-
 struct rdma_cm_id *rdma_create_id(struct net *net,
 				  rdma_cm_event_handler event_handler,
 				  void *context, enum rdma_port_space ps,
@@ -1671,11 +1660,12 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	struct rdma_cm_event event;
 	int ret = 0;
 
+	mutex_lock(&id_priv->handler_mutex);
 	if ((ib_event->event != IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_CONNECT)) ||
+	     id_priv->state != RDMA_CM_CONNECT) ||
 	    (ib_event->event == IB_CM_TIMEWAIT_EXIT &&
-		cma_disable_callback(id_priv, RDMA_CM_DISCONNECT)))
-		return 0;
+	     id_priv->state != RDMA_CM_DISCONNECT))
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -1870,7 +1860,7 @@ static int cma_check_req_qp_type(struct rdma_cm_id *id, struct ib_cm_event *ib_e
 
 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 {
-	struct rdma_id_private *listen_id, *conn_id;
+	struct rdma_id_private *listen_id, *conn_id = NULL;
 	struct rdma_cm_event event;
 	struct net_device *net_dev;
 	int offset, ret;
@@ -1884,9 +1874,10 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		goto net_dev_put;
 	}
 
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN)) {
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN) {
 		ret = -ECONNABORTED;
-		goto net_dev_put;
+		goto err1;
 	}
 
 	memset(&event, 0, sizeof event);
@@ -1976,8 +1967,9 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (iw_event->event) {
@@ -2029,6 +2021,7 @@ static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
 		return ret;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return ret;
 }
@@ -2039,13 +2032,15 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	struct rdma_cm_id *new_cm_id;
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = -ECONNABORTED;
 	struct sockaddr *laddr = (struct sockaddr *)&iw_event->local_addr;
 	struct sockaddr *raddr = (struct sockaddr *)&iw_event->remote_addr;
 
 	listen_id = cm_id->context;
-	if (cma_disable_callback(listen_id, RDMA_CM_LISTEN))
-		return -ECONNABORTED;
+
+	mutex_lock(&listen_id->handler_mutex);
+	if (listen_id->state != RDMA_CM_LISTEN)
+		goto out;
 
 	/* Create a new RDMA id for the new IW CM ID */
 	new_cm_id = rdma_create_id(listen_id->id.route.addr.dev_addr.net,
@@ -3216,8 +3211,9 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 	struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
 	int ret = 0;
 
-	if (cma_disable_callback(id_priv, RDMA_CM_CONNECT))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_CONNECT)
+		goto out;
 
 	memset(&event, 0, sizeof event);
 	switch (ib_event->event) {
@@ -3673,12 +3669,13 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 	struct rdma_id_private *id_priv;
 	struct cma_multicast *mc = multicast->context;
 	struct rdma_cm_event event;
-	int ret;
+	int ret = 0;
 
 	id_priv = mc->id_priv;
-	if (cma_disable_callback(id_priv, RDMA_CM_ADDR_BOUND) &&
-	    cma_disable_callback(id_priv, RDMA_CM_ADDR_RESOLVED))
-		return 0;
+	mutex_lock(&id_priv->handler_mutex);
+	if (id_priv->state != RDMA_CM_ADDR_BOUND &&
+	    id_priv->state != RDMA_CM_ADDR_RESOLVED)
+		goto out;
 
 	if (!status)
 		status = cma_set_qkey(id_priv, be32_to_cpu(multicast->rec.qkey));
@@ -3720,6 +3717,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
 		return 0;
 	}
 
+out:
 	mutex_unlock(&id_priv->handler_mutex);
 	return 0;
 }
@@ -3878,12 +3876,12 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 	gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num -
 		   rdma_start_port(id_priv->cma_dev->device)];
 	if (addr->sa_family == AF_INET) {
-		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) {
+			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
 			err = cma_igmp_send(ndev, &mc->multicast.ib->rec.mgid,
 					    true);
-		if (!err) {
-			mc->igmp_joined = true;
-			mc->multicast.ib->rec.hop_limit = IPV6_DEFAULT_HOPLIMIT;
+			if (!err)
+				mc->igmp_joined = true;
 		}
 	} else {
 		if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 5516fb070344..5c155fa91eec 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -661,6 +661,9 @@ int ib_query_port(struct ib_device *device,
 	if (err || port_attr->subnet_prefix)
 		return err;
 
+	if (rdma_port_get_link_layer(device, port_num) != IB_LINK_LAYER_INFINIBAND)
+		return 0;
+
 	err = ib_query_gid(device, port_num, 0, &gid, NULL);
 	if (err)
 		return err;
@@ -1024,7 +1027,8 @@ static int __init ib_core_init(void)
 		goto err_mad;
 	}
 
-	if (ib_add_ibnl_clients()) {
+	ret = ib_add_ibnl_clients();
+	if (ret) {
 		pr_warn("Couldn't register ibnl clients\n");
 		goto err_sa;
 	}
diff --git a/drivers/infiniband/core/iwpm_msg.c b/drivers/infiniband/core/iwpm_msg.c
index 43e3fa27102b..1c41b95cefec 100644
--- a/drivers/infiniband/core/iwpm_msg.c
+++ b/drivers/infiniband/core/iwpm_msg.c
@@ -506,7 +506,7 @@ int iwpm_add_and_query_mapping_cb(struct sk_buff *skb,
 	if (!nlmsg_request) {
 		pr_info("%s: Could not find a matching request (seq = %u)\n",
 				 __func__, msg_seq);
-			return -EINVAL;
+		return -EINVAL;
 	}
 	pm_msg = nlmsg_request->req_buffer;
 	local_sockaddr = (struct sockaddr_storage *)
diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 82fb511112da..2d49228f28b2 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -1638,9 +1638,9 @@ static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
 		/* Now, check to see if there are any methods still in use */
 		if (!check_method_table(method)) {
 			/* If not, release management method table */
-			 kfree(method);
-			 class->method_table[mgmt_class] = NULL;
-			 /* Any management classes left ? */
+			kfree(method);
+			class->method_table[mgmt_class] = NULL;
+			/* Any management classes left ? */
 			if (!check_class_table(class)) {
 				/* If not, release management class table */
 				kfree(class);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 5e573bb18660..a5793c8f1590 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -889,9 +889,9 @@ static struct attribute *alloc_hsa_lifespan(char *name, u8 port_num)
 static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 			   u8 port_num)
 {
-	struct attribute_group *hsag = NULL;
+	struct attribute_group *hsag;
 	struct rdma_hw_stats *stats;
-	int i = 0, ret;
+	int i, ret;
 
 	stats = device->alloc_hw_stats(device, port_num);
 
@@ -899,19 +899,22 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 		return;
 
 	if (!stats->names || stats->num_counters <= 0)
-		goto err;
+		goto err_free_stats;
 
+	/*
+	 * Two extra attribue elements here, one for the lifespan entry and
+	 * one to NULL terminate the list for the sysfs core code
+	 */
 	hsag = kzalloc(sizeof(*hsag) +
-		       // 1 extra for the lifespan config entry
-		       sizeof(void *) * (stats->num_counters + 1),
+		       sizeof(void *) * (stats->num_counters + 2),
 		       GFP_KERNEL);
 	if (!hsag)
-		return;
+		goto err_free_stats;
 
 	ret = device->get_hw_stats(device, stats, port_num,
 				   stats->num_counters);
 	if (ret != stats->num_counters)
-		goto err;
+		goto err_free_hsag;
 
 	stats->timestamp = jiffies;
 
@@ -922,10 +925,13 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 		hsag->attrs[i] = alloc_hsa(i, port_num, stats->names[i]);
 		if (!hsag->attrs[i])
 			goto err;
+		sysfs_attr_init(hsag->attrs[i]);
 	}
 
 	/* treat an error here as non-fatal */
 	hsag->attrs[i] = alloc_hsa_lifespan("lifespan", port_num);
+	if (hsag->attrs[i])
+		sysfs_attr_init(hsag->attrs[i]);
 
 	if (port) {
 		struct kobject *kobj = &port->kobj;
@@ -946,10 +952,12 @@ static void setup_hw_stats(struct ib_device *device, struct ib_port *port,
 	return;
 
 err:
-	kfree(stats);
 	for (; i >= 0; i--)
 		kfree(hsag->attrs[i]);
+err_free_hsag:
 	kfree(hsag);
+err_free_stats:
+	kfree(stats);
 	return;
 }
 
diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 1a8babb8ee3c..825021d1008b 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1747,7 +1747,7 @@ static int create_qp(struct ib_uverbs_file *file,
 	struct ib_srq			*srq = NULL;
 	struct ib_qp			*qp;
 	char				*buf;
-	struct ib_qp_init_attr		attr;
+	struct ib_qp_init_attr		attr = {};
 	struct ib_uverbs_ex_create_qp_resp resp;
 	int				ret;
 
diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index 1d7d4cf442e3..6298f54b4137 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c
@@ -511,12 +511,16 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
 		ah_attr->grh.dgid = sgid;
 
 		if (!rdma_cap_eth_ah(device, port_num)) {
-			ret = ib_find_cached_gid_by_port(device, &dgid,
-							 IB_GID_TYPE_IB,
-							 port_num, NULL,
-							 &gid_index);
-			if (ret)
-				return ret;
+			if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
+				ret = ib_find_cached_gid_by_port(device, &dgid,
+								 IB_GID_TYPE_IB,
+								 port_num, NULL,
+								 &gid_index);
+				if (ret)
+					return ret;
+			} else {
+				gid_index = 0;
+			}
 		}
 
 		ah_attr->grh.sgid_index = (u8) gid_index;
diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 6e7050ab9e16..14d7eeb09be6 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -300,16 +300,15 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 	const struct cpumask *node_mask,
 		*proc_mask = tsk_cpus_allowed(current);
 	struct cpu_mask_set *set = &dd->affinity->proc;
-	char buf[1024];
 
 	/*
 	 * check whether process/context affinity has already
 	 * been set
 	 */
 	if (cpumask_weight(proc_mask) == 1) {
-		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
-		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %s",
-			  current->pid, current->comm, buf);
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
+			  current->pid, current->comm,
+			  cpumask_pr_args(proc_mask));
 		/*
 		 * Mark the pre-set CPU as used. This is atomic so we don't
 		 * need the lock
@@ -318,9 +317,9 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 		cpumask_set_cpu(cpu, &set->used);
 		goto done;
 	} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
-		scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(proc_mask));
-		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %s",
-			  current->pid, current->comm, buf);
+		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
+			  current->pid, current->comm,
+			  cpumask_pr_args(proc_mask));
 		goto done;
 	}
 
@@ -356,8 +355,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 	cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
 				  &dd->affinity->rcv_intr.mask :
 				  &dd->affinity->rcv_intr.used));
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(intrs));
-	hfi1_cdbg(PROC, "CPUs used by interrupts: %s", buf);
+	hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
+		  cpumask_pr_args(intrs));
 
 	/*
 	 * If we don't have a NUMA node requested, preference is towards
@@ -366,18 +365,16 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 	if (node == -1)
 		node = dd->node;
 	node_mask = cpumask_of_node(node);
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(node_mask));
-	hfi1_cdbg(PROC, "device on NUMA %u, CPUs %s", node, buf);
+	hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node,
+		  cpumask_pr_args(node_mask));
 
 	/* diff will hold all unused cpus */
 	cpumask_andnot(diff, &set->mask, &set->used);
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(diff));
-	hfi1_cdbg(PROC, "unused CPUs (all) %s", buf);
+	hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff));
 
 	/* get cpumask of available CPUs on preferred NUMA */
 	cpumask_and(mask, diff, node_mask);
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
-	hfi1_cdbg(PROC, "available cpus on NUMA %s", buf);
+	hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask));
 
 	/*
 	 * At first, we don't want to place processes on the same
@@ -395,8 +392,8 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
 		cpumask_andnot(diff, &set->mask, &set->used);
 		cpumask_andnot(mask, diff, node_mask);
 	}
-	scnprintf(buf, 1024, "%*pbl", cpumask_pr_args(mask));
-	hfi1_cdbg(PROC, "possible CPUs for process %s", buf);
+	hfi1_cdbg(PROC, "possible CPUs for process %*pbl",
+		  cpumask_pr_args(mask));
 
 	cpu = cpumask_first(mask);
 	if (cpu >= nr_cpu_ids) /* empty */
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 3b876da745a1..f5de85178055 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -1037,7 +1037,7 @@ static void dc_shutdown(struct hfi1_devdata *);
 static void dc_start(struct hfi1_devdata *);
 static int qos_rmt_entries(struct hfi1_devdata *dd, unsigned int *mp,
 			   unsigned int *np);
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd);
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd);
 
 /*
  * Error interrupt table entry.  This is used as input to the interrupt
@@ -6962,8 +6962,6 @@ void handle_link_down(struct work_struct *work)
 	}
 
 	reset_neighbor_info(ppd);
-	if (ppd->mgmt_allowed)
-		remove_full_mgmt_pkey(ppd);
 
 	/* disable the port */
 	clear_rcvctrl(ppd->dd, RCV_CTRL_RCV_PORT_ENABLE_SMASK);
@@ -7070,12 +7068,16 @@ static void add_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 			    __func__, ppd->pkeys[2], FULL_MGMT_P_KEY);
 	ppd->pkeys[2] = FULL_MGMT_P_KEY;
 	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	hfi1_event_pkey_change(ppd->dd, ppd->port);
 }
 
-static void remove_full_mgmt_pkey(struct hfi1_pportdata *ppd)
+static void clear_full_mgmt_pkey(struct hfi1_pportdata *ppd)
 {
-	ppd->pkeys[2] = 0;
-	(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+	if (ppd->pkeys[2] != 0) {
+		ppd->pkeys[2] = 0;
+		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
+		hfi1_event_pkey_change(ppd->dd, ppd->port);
+	}
 }
 
 /*
@@ -7832,8 +7834,8 @@ static void handle_dcc_err(struct hfi1_devdata *dd, u32 unused, u64 reg)
 			 * save first 2 flits in the packet that caused
 			 * the error
 			 */
-			 dd->err_info_rcvport.packet_flit1 = hdr0;
-			 dd->err_info_rcvport.packet_flit2 = hdr1;
+			dd->err_info_rcvport.packet_flit1 = hdr0;
+			dd->err_info_rcvport.packet_flit2 = hdr1;
 		}
 		switch (info) {
 		case 1:
@@ -9168,6 +9170,13 @@ int start_link(struct hfi1_pportdata *ppd)
 		return 0;
 	}
 
+	/*
+	 * FULL_MGMT_P_KEY is cleared from the pkey table, so that the
+	 * pkey table can be configured properly if the HFI unit is connected
+	 * to switch port with MgmtAllowed=NO
+	 */
+	clear_full_mgmt_pkey(ppd);
+
 	return set_link_state(ppd, HLS_DN_POLL);
 }
 
@@ -9777,7 +9786,7 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 	u64 len1 = 0, len2 = (((dd->vld[15].mtu + max_hb) >> 2)
 			      & SEND_LEN_CHECK1_LEN_VL15_MASK) <<
 		SEND_LEN_CHECK1_LEN_VL15_SHIFT;
-	int i;
+	int i, j;
 	u32 thres;
 
 	for (i = 0; i < ppd->vls_supported; i++) {
@@ -9801,7 +9810,10 @@ static void set_send_length(struct hfi1_pportdata *ppd)
 			    sc_mtu_to_threshold(dd->vld[i].sc,
 						dd->vld[i].mtu,
 						dd->rcd[0]->rcvhdrqentsize));
-		sc_set_cr_threshold(dd->vld[i].sc, thres);
+		for (j = 0; j < INIT_SC_PER_VL; j++)
+			sc_set_cr_threshold(
+					pio_select_send_context_vl(dd, j, i),
+					    thres);
 	}
 	thres = min(sc_percent_to_threshold(dd->vld[15].sc, 50),
 		    sc_mtu_to_threshold(dd->vld[15].sc,
@@ -11906,7 +11918,7 @@ static void update_synth_timer(unsigned long opaque)
 		hfi1_cdbg(CNTR, "[%d] No update necessary", dd->unit);
 	}
 
-mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
+	mod_timer(&dd->synth_stats_timer, jiffies + HZ * SYNTH_CNT_TIME);
 }
 
 #define C_MAX_NAME 13 /* 12 chars + one for /0 */
diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c
index 7a5b0e676cc7..c702a009608f 100644
--- a/drivers/infiniband/hw/hfi1/file_ops.c
+++ b/drivers/infiniband/hw/hfi1/file_ops.c
@@ -203,6 +203,9 @@ static long hfi1_file_ioctl(struct file *fp, unsigned int cmd,
 
 	switch (cmd) {
 	case HFI1_IOCTL_ASSIGN_CTXT:
+		if (uctxt)
+			return -EINVAL;
+
 		if (copy_from_user(&uinfo,
 				   (struct hfi1_user_info __user *)arg,
 				   sizeof(uinfo)))
diff --git a/drivers/infiniband/hw/hfi1/init.c b/drivers/infiniband/hw/hfi1/init.c
index 5cc492e5776d..eed971ccd2a1 100644
--- a/drivers/infiniband/hw/hfi1/init.c
+++ b/drivers/infiniband/hw/hfi1/init.c
@@ -1337,7 +1337,7 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
 		dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
 				  (void *)dd->rcvhdrtail_dummy_kvaddr,
 				  dd->rcvhdrtail_dummy_physaddr);
-				  dd->rcvhdrtail_dummy_kvaddr = NULL;
+		dd->rcvhdrtail_dummy_kvaddr = NULL;
 	}
 
 	for (ctxt = 0; tmp && ctxt < dd->num_rcv_contexts; ctxt++) {
@@ -1383,7 +1383,7 @@ static void postinit_cleanup(struct hfi1_devdata *dd)
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	int ret = 0, j, pidx, initfail;
-	struct hfi1_devdata *dd = NULL;
+	struct hfi1_devdata *dd = ERR_PTR(-EINVAL);
 	struct hfi1_pportdata *ppd;
 
 	/* First, lock the non-writable module parameters */
diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 219029576ba0..fca07a1d6c28 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -78,6 +78,16 @@ static inline void clear_opa_smp_data(struct opa_smp *smp)
 	memset(data, 0, size);
 }
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port)
+{
+	struct ib_event event;
+
+	event.event = IB_EVENT_PKEY_CHANGE;
+	event.device = &dd->verbs_dev.rdi.ibdev;
+	event.element.port_num = port;
+	ib_dispatch_event(&event);
+}
+
 static void send_trap(struct hfi1_ibport *ibp, void *data, unsigned len)
 {
 	struct ib_mad_send_buf *send_buf;
@@ -1418,15 +1428,10 @@ static int set_pkeys(struct hfi1_devdata *dd, u8 port, u16 *pkeys)
 	}
 
 	if (changed) {
-		struct ib_event event;
-
 		(void)hfi1_set_ib_cfg(ppd, HFI1_IB_CFG_PKEYS, 0);
-
-		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.rdi.ibdev;
-		event.element.port_num = port;
-		ib_dispatch_event(&event);
+		hfi1_event_pkey_change(dd, port);
 	}
+
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/mad.h b/drivers/infiniband/hw/hfi1/mad.h
index 55ee08675333..8b734aaae88a 100644
--- a/drivers/infiniband/hw/hfi1/mad.h
+++ b/drivers/infiniband/hw/hfi1/mad.h
@@ -434,4 +434,6 @@ struct sc2vlnt {
 		    COUNTER_MASK(1, 3) | \
 		    COUNTER_MASK(1, 4))
 
+void hfi1_event_pkey_change(struct hfi1_devdata *dd, u8 port);
+
 #endif				/* _HFI1_MAD_H */
diff --git a/drivers/infiniband/hw/hfi1/pio.c b/drivers/infiniband/hw/hfi1/pio.c
index d5edb1afbb8f..d4022450b73f 100644
--- a/drivers/infiniband/hw/hfi1/pio.c
+++ b/drivers/infiniband/hw/hfi1/pio.c
@@ -995,7 +995,7 @@ static void sc_wait_for_packet_egress(struct send_context *sc, int pause)
 		/* counter is reset if occupancy count changes */
 		if (reg != reg_prev)
 			loop = 0;
-		if (loop > 500) {
+		if (loop > 50000) {
 			/* timed out - bounce the link */
 			dd_dev_err(dd,
 				   "%s: context %u(%u) timeout waiting for packets to egress, remaining count %u, bouncing link\n",
@@ -1798,6 +1798,21 @@ static void pio_map_rcu_callback(struct rcu_head *list)
 }
 
 /*
+ * Set credit return threshold for the kernel send context
+ */
+static void set_threshold(struct hfi1_devdata *dd, int scontext, int i)
+{
+	u32 thres;
+
+	thres = min(sc_percent_to_threshold(dd->kernel_send_context[scontext],
+					    50),
+		    sc_mtu_to_threshold(dd->kernel_send_context[scontext],
+					dd->vld[i].mtu,
+					dd->rcd[0]->rcvhdrqentsize));
+	sc_set_cr_threshold(dd->kernel_send_context[scontext], thres);
+}
+
+/*
  * pio_map_init - called when #vls change
  * @dd: hfi1_devdata
  * @port: port number
@@ -1872,11 +1887,16 @@ int pio_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_scontexts)
 			if (!newmap->map[i])
 				goto bail;
 			newmap->map[i]->mask = (1 << ilog2(sz)) - 1;
-			/* assign send contexts */
+			/*
+			 * assign send contexts and
+			 * adjust credit return threshold
+			 */
 			for (j = 0; j < sz; j++) {
-				if (dd->kernel_send_context[scontext])
+				if (dd->kernel_send_context[scontext]) {
 					newmap->map[i]->ksc[j] =
 					dd->kernel_send_context[scontext];
+					set_threshold(dd, scontext, i);
+				}
 				if (++scontext >= first_scontext +
 						  vl_scontexts[i])
 					/* wrap back to first send context */
diff --git a/drivers/infiniband/hw/hfi1/qsfp.c b/drivers/infiniband/hw/hfi1/qsfp.c
index 2441669f0817..9fb561682c66 100644
--- a/drivers/infiniband/hw/hfi1/qsfp.c
+++ b/drivers/infiniband/hw/hfi1/qsfp.c
@@ -579,7 +579,8 @@ int qsfp_dump(struct hfi1_pportdata *ppd, char *buf, int len)
 
 	if (ppd->qsfp_info.cache_valid) {
 		if (QSFP_IS_CU(cache[QSFP_MOD_TECH_OFFS]))
-			sprintf(lenstr, "%dM ", cache[QSFP_MOD_LEN_OFFS]);
+			snprintf(lenstr, sizeof(lenstr), "%dM ",
+				 cache[QSFP_MOD_LEN_OFFS]);
 
 		power_byte = cache[QSFP_MOD_PWR_OFFS];
 		sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n",
diff --git a/drivers/infiniband/hw/hfi1/trace.c b/drivers/infiniband/hw/hfi1/trace.c
index 79b2952c0dfb..4cfb13771897 100644
--- a/drivers/infiniband/hw/hfi1/trace.c
+++ b/drivers/infiniband/hw/hfi1/trace.c
@@ -214,19 +214,6 @@ const char *print_u32_array(
 	return ret;
 }
 
-const char *print_u64_array(
-	struct trace_seq *p,
-	u64 *arr, int len)
-{
-	int i;
-	const char *ret = trace_seq_buffer_ptr(p);
-
-	for (i = 0; i < len; i++)
-		trace_seq_printf(p, "%s0x%016llx", i == 0 ? "" : " ", arr[i]);
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
 __hfi1_trace_fn(PKT);
 __hfi1_trace_fn(PROC);
 __hfi1_trace_fn(SDMA);
diff --git a/drivers/infiniband/hw/hfi1/user_sdma.c b/drivers/infiniband/hw/hfi1/user_sdma.c
index 29f4795f866c..47ffd273ecbd 100644
--- a/drivers/infiniband/hw/hfi1/user_sdma.c
+++ b/drivers/infiniband/hw/hfi1/user_sdma.c
@@ -183,7 +183,7 @@ struct user_sdma_iovec {
 	struct sdma_mmu_node *node;
 };
 
-#define SDMA_CACHE_NODE_EVICT BIT(0)
+#define SDMA_CACHE_NODE_EVICT 0
 
 struct sdma_mmu_node {
 	struct mmu_rb_node rb;
@@ -1355,11 +1355,11 @@ static int set_txreq_header(struct user_sdma_request *req,
 		 */
 		SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
 			 req->tidoffset, req->tidoffset / req->omfactor,
-			 !!(req->omfactor - KDETH_OM_SMALL));
+			 req->omfactor != KDETH_OM_SMALL);
 		KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
 			  req->tidoffset / req->omfactor);
 		KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
-			  !!(req->omfactor - KDETH_OM_SMALL));
+			  req->omfactor != KDETH_OM_SMALL);
 	}
 done:
 	trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.c b/drivers/infiniband/hw/hfi1/verbs_txreq.c
index bc95c4112c61..d8fb056526f8 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.c
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.c
@@ -92,11 +92,10 @@ void hfi1_put_txreq(struct verbs_txreq *tx)
 
 struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 				struct rvt_qp *qp)
+	__must_hold(&qp->s_lock)
 {
 	struct verbs_txreq *tx = ERR_PTR(-EBUSY);
-	unsigned long flags;
 
-	spin_lock_irqsave(&qp->s_lock, flags);
 	write_seqlock(&dev->iowait_lock);
 	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct hfi1_qp_priv *priv;
@@ -116,7 +115,6 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 	}
 out:
 	write_sequnlock(&dev->iowait_lock);
-	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return tx;
 }
 
diff --git a/drivers/infiniband/hw/hfi1/verbs_txreq.h b/drivers/infiniband/hw/hfi1/verbs_txreq.h
index 1cf69b2fe4a5..a1d6e0807f97 100644
--- a/drivers/infiniband/hw/hfi1/verbs_txreq.h
+++ b/drivers/infiniband/hw/hfi1/verbs_txreq.h
@@ -73,6 +73,7 @@ struct verbs_txreq *__get_txreq(struct hfi1_ibdev *dev,
 
 static inline struct verbs_txreq *get_txreq(struct hfi1_ibdev *dev,
 					    struct rvt_qp *qp)
+	__must_hold(&qp->slock)
 {
 	struct verbs_txreq *tx;
 	struct hfi1_qp_priv *priv = qp->priv;
diff --git a/drivers/infiniband/hw/i40iw/i40iw.h b/drivers/infiniband/hw/i40iw/i40iw.h
index 8b9532034558..b738acdb9b02 100644
--- a/drivers/infiniband/hw/i40iw/i40iw.h
+++ b/drivers/infiniband/hw/i40iw/i40iw.h
@@ -113,6 +113,8 @@
 
 #define IW_HMC_OBJ_TYPE_NUM ARRAY_SIZE(iw_hmc_obj_types)
 #define IW_CFG_FPM_QP_COUNT		32768
+#define I40IW_MAX_PAGES_PER_FMR		512
+#define I40IW_MIN_PAGES_PER_FMR		1
 
 #define I40IW_MTU_TO_MSS		40
 #define I40IW_DEFAULT_MSS		1460
diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
index 02a735b64208..33959ed14563 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c
@@ -79,6 +79,7 @@ static int i40iw_query_device(struct ib_device *ibdev,
 	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
 	props->atomic_cap = IB_ATOMIC_NONE;
 	props->max_map_per_fmr = 1;
+	props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR;
 	return 0;
 }
 
@@ -1527,7 +1528,7 @@ static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd,
 	mutex_lock(&iwdev->pbl_mutex);
 	status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
 	mutex_unlock(&iwdev->pbl_mutex);
-	if (!status)
+	if (status)
 		goto err1;
 
 	if (palloc->level != I40IW_LEVEL_1)
@@ -2149,6 +2150,7 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
 			struct i40iw_fast_reg_stag_info info;
 
+			memset(&info, 0, sizeof(info));
 			info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
 			info.access_rights |= i40iw_get_user_access(flags);
 			info.stag_key = reg_wr(ib_wr)->key & 0xff;
@@ -2158,10 +2160,14 @@ static int i40iw_post_send(struct ib_qp *ibqp,
 			info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
 			info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
 			info.total_len = iwmr->ibmr.length;
+			info.reg_addr_pa = *(u64 *)palloc->level1.addr;
 			info.first_pm_pbl_index = palloc->level1.idx;
 			info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
 			info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
 
+			if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
+				info.chunk_size = 1;
+
 			if (page_shift == 21)
 				info.page_size = 1; /* 2M page */
 
@@ -2327,13 +2333,16 @@ static int i40iw_req_notify_cq(struct ib_cq *ibcq,
 {
 	struct i40iw_cq *iwcq;
 	struct i40iw_cq_uk *ukcq;
-	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_SOLICITED;
+	unsigned long flags;
+	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT;
 
 	iwcq = (struct i40iw_cq *)ibcq;
 	ukcq = &iwcq->sc_cq.cq_uk;
-	if (notify_flags == IB_CQ_NEXT_COMP)
-		cq_notify = IW_CQ_COMPL_EVENT;
+	if (notify_flags == IB_CQ_SOLICITED)
+		cq_notify = IW_CQ_COMPL_SOLICITED;
+	spin_lock_irqsave(&iwcq->lock, flags);
 	ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
+	spin_unlock_irqrestore(&iwcq->lock, flags);
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c
index 105246fba2e7..5fc623362731 100644
--- a/drivers/infiniband/hw/mlx4/ah.c
+++ b/drivers/infiniband/hw/mlx4/ah.c
@@ -47,6 +47,7 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 
 	ah->av.ib.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
 	ah->av.ib.g_slid  = ah_attr->src_path_bits;
+	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		ah->av.ib.g_slid   |= 0x80;
 		ah->av.ib.gid_index = ah_attr->grh.sgid_index;
@@ -64,7 +65,6 @@ static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 		       !(1 << ah->av.ib.stat_rate & dev->caps.stat_rate_support))
 			--ah->av.ib.stat_rate;
 	}
-	ah->av.ib.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 28);
 
 	return &ah->ibah;
 }
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index d68f506c1922..9c2e53d28f98 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -527,7 +527,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 		tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
 	spin_unlock(&tun_qp->tx_lock);
 	if (ret)
-		goto out;
+		goto end;
 
 	tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
 	if (tun_qp->tx_ring[tun_tx_ix].ah)
@@ -596,9 +596,15 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
 	wr.wr.send_flags = IB_SEND_SIGNALED;
 
 	ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
-out:
-	if (ret)
-		ib_destroy_ah(ah);
+	if (!ret)
+		return 0;
+ out:
+	spin_lock(&tun_qp->tx_lock);
+	tun_qp->tx_ix_tail++;
+	spin_unlock(&tun_qp->tx_lock);
+	tun_qp->tx_ring[tun_tx_ix].ah = NULL;
+end:
+	ib_destroy_ah(ah);
 	return ret;
 }
 
@@ -1326,9 +1332,15 @@ int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
 
 
 	ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
+	if (!ret)
+		return 0;
+
+	spin_lock(&sqp->tx_lock);
+	sqp->tx_ix_tail++;
+	spin_unlock(&sqp->tx_lock);
+	sqp->tx_ring[wire_tx_ix].ah = NULL;
 out:
-	if (ret)
-		ib_destroy_ah(ah);
+	ib_destroy_ah(ah);
 	return ret;
 }
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index b01ef6eee6e8..42a46078d7d5 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -505,9 +505,9 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
 		else
 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
-	if (dev->steering_support ==  MLX4_STEERING_MODE_DEVICE_MANAGED)
-		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 	}
+	if (dev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 
 	props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 
@@ -1704,6 +1704,9 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	int is_bonded = mlx4_is_bonded(dev);
 
+	if (flow_attr->port < 1 || flow_attr->port > qp->device->phys_port_cnt)
+		return ERR_PTR(-EINVAL);
+
 	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
 	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
 		return ERR_PTR(-EOPNOTSUPP);
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 6c5ac5d8f32f..29acda249612 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -139,7 +139,7 @@ struct mlx4_ib_mr {
 	u32			max_pages;
 	struct mlx4_mr		mmr;
 	struct ib_umem	       *umem;
-	void			*pages_alloc;
+	size_t			page_map_size;
 };
 
 struct mlx4_ib_mw {
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 631272172a0b..5d73989d9771 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -277,20 +277,23 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 		      struct mlx4_ib_mr *mr,
 		      int max_pages)
 {
-	int size = max_pages * sizeof(u64);
-	int add_size;
 	int ret;
 
-	add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
+	/* Ensure that size is aligned to DMA cacheline
+	 * requirements.
+	 * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
+	 * so page_map_size will never cross PAGE_SIZE.
+	 */
+	mr->page_map_size = roundup(max_pages * sizeof(u64),
+				    MLX4_MR_PAGES_ALIGN);
 
-	mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL);
-	if (!mr->pages_alloc)
+	/* Prevent cross page boundary allocation. */
+	mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
+	if (!mr->pages)
 		return -ENOMEM;
 
-	mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN);
-
 	mr->page_map = dma_map_single(device->dma_device, mr->pages,
-				      size, DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	if (dma_mapping_error(device->dma_device, mr->page_map)) {
 		ret = -ENOMEM;
@@ -298,9 +301,9 @@ mlx4_alloc_priv_pages(struct ib_device *device,
 	}
 
 	return 0;
-err:
-	kfree(mr->pages_alloc);
 
+err:
+	free_page((unsigned long)mr->pages);
 	return ret;
 }
 
@@ -309,11 +312,10 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
 {
 	if (mr->pages) {
 		struct ib_device *device = mr->ibmr.device;
-		int size = mr->max_pages * sizeof(u64);
 
 		dma_unmap_single(device->dma_device, mr->page_map,
-				 size, DMA_TO_DEVICE);
-		kfree(mr->pages_alloc);
+				 mr->page_map_size, DMA_TO_DEVICE);
+		free_page((unsigned long)mr->pages);
 		mr->pages = NULL;
 	}
 }
@@ -537,14 +539,12 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
 	mr->npages = 0;
 
 	ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
-				   sizeof(u64) * mr->max_pages,
-				   DMA_TO_DEVICE);
+				   mr->page_map_size, DMA_TO_DEVICE);
 
 	rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
 
 	ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
-				      sizeof(u64) * mr->max_pages,
-				      DMA_TO_DEVICE);
+				      mr->page_map_size, DMA_TO_DEVICE);
 
 	return rc;
 }
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 81b0e1fbec1d..8db8405c1e99 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -362,7 +362,7 @@ static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags)
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_RC:
 		return sizeof (struct mlx4_wqe_ctrl_seg) +
-			sizeof (struct mlx4_wqe_atomic_seg) +
+			sizeof (struct mlx4_wqe_masked_atomic_seg) +
 			sizeof (struct mlx4_wqe_raddr_seg);
 	case MLX4_IB_QPT_SMI:
 	case MLX4_IB_QPT_GSI:
@@ -1191,8 +1191,10 @@ static struct ib_qp *_mlx4_ib_create_qp(struct ib_pd *pd,
 	{
 		err = create_qp_common(to_mdev(pd->device), pd, init_attr,
 				       udata, 0, &qp, gfp);
-		if (err)
+		if (err) {
+			kfree(qp);
 			return ERR_PTR(err);
+		}
 
 		qp->ibqp.qp_num = qp->mqp.qpn;
 		qp->xrcdn = xrcdn;
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index dabcc65bd65e..9c0e67bd2ba7 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -822,7 +822,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 	int eqn;
 	int err;
 
-	if (entries < 0)
+	if (entries < 0 ||
+	    (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))))
 		return ERR_PTR(-EINVAL);
 
 	if (check_cq_create_flags(attr->flags))
@@ -1168,11 +1169,16 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		return -ENOSYS;
 	}
 
-	if (entries < 1)
+	if (entries < 1 ||
+	    entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz))) {
+		mlx5_ib_warn(dev, "wrong entries number %d, max %d\n",
+			     entries,
+			     1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz));
 		return -EINVAL;
+	}
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries >  (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
+	if (entries > (1 << MLX5_CAP_GEN(dev->mdev, log_max_cq_sz)) + 1)
 		return -EINVAL;
 
 	if (entries == ibcq->cqe + 1)
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 1534af113058..364aab9f3c9e 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -121,7 +121,7 @@ static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
 	pma_cnt_ext->port_xmit_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
 					 transmitted_ib_multicast.octets) >> 2);
-	pma_cnt_ext->port_xmit_data =
+	pma_cnt_ext->port_rcv_data =
 		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
 					 received_ib_multicast.octets) >> 2);
 	pma_cnt_ext->port_xmit_packets =
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index c72797cd9e4f..b48ad85315dc 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -524,6 +524,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	    MLX5_CAP_ETH(dev->mdev, scatter_fcs))
 		props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
 
+	if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
+		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
+
 	props->vendor_part_id	   = mdev->pdev->device;
 	props->hw_ver		   = mdev->pdev->revision;
 
@@ -915,7 +918,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
 	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
 	resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
-	resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
+	if (mlx5_core_is_pf(dev->mdev) && MLX5_CAP_GEN(dev->mdev, bf))
+		resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
 	resp.cache_line_size = L1_CACHE_BYTES;
 	resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
 	resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
@@ -988,7 +992,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	if (field_avail(typeof(resp), cqe_version, udata->outlen))
 		resp.response_length += sizeof(resp.cqe_version);
 
-	if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
+	/*
+	 * We don't want to expose information from the PCI bar that is located
+	 * after 4096 bytes, so if the arch only supports larger pages, let's
+	 * pretend we don't support reading the HCA's core clock. This is also
+	 * forced by mmap function.
+	 */
+	if (PAGE_SIZE <= 4096 &&
+	    field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
 		resp.comp_mask |=
 			MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
 		resp.hca_core_clock_offset =
@@ -1798,7 +1809,7 @@ static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
 {
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
-	return sprintf(buf, "%d.%d.%d\n", fw_rev_maj(dev->mdev),
+	return sprintf(buf, "%d.%d.%04d\n", fw_rev_maj(dev->mdev),
 		       fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev));
 }
 
@@ -1866,14 +1877,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 		break;
 
 	case MLX5_DEV_EVENT_PORT_DOWN:
+	case MLX5_DEV_EVENT_PORT_INITIALIZED:
 		ibev.event = IB_EVENT_PORT_ERR;
 		port = (u8)param;
 		break;
 
-	case MLX5_DEV_EVENT_PORT_INITIALIZED:
-		/* not used by ULPs */
-		return;
-
 	case MLX5_DEV_EVENT_LID_CHANGE:
 		ibev.event = IB_EVENT_LID_CHANGE;
 		port = (u8)param;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 504117657d41..ce0a7ab35a22 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -235,6 +235,8 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 		qp->rq.max_gs = 0;
 		qp->rq.wqe_cnt = 0;
 		qp->rq.wqe_shift = 0;
+		cap->max_recv_wr = 0;
+		cap->max_recv_sge = 0;
 	} else {
 		if (ucmd) {
 			qp->rq.wqe_cnt = ucmd->rq_wqe_count;
@@ -1851,13 +1853,15 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
 static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 			 const struct ib_ah_attr *ah,
 			 struct mlx5_qp_path *path, u8 port, int attr_mask,
-			 u32 path_flags, const struct ib_qp_attr *attr)
+			 u32 path_flags, const struct ib_qp_attr *attr,
+			 bool alt)
 {
 	enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
 	int err;
 
 	if (attr_mask & IB_QP_PKEY_INDEX)
-		path->pkey_index = attr->pkey_index;
+		path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
+						     attr->pkey_index);
 
 	if (ah->ah_flags & IB_AH_GRH) {
 		if (ah->grh.sgid_index >=
@@ -1877,9 +1881,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 							  ah->grh.sgid_index);
 		path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
 	} else {
-		path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
-		path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 :
-									0;
+		path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
+		path->fl_free_ar |=
+			(path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x40 : 0;
 		path->rlid = cpu_to_be16(ah->dlid);
 		path->grh_mlid = ah->src_path_bits & 0x7f;
 		if (ah->ah_flags & IB_AH_GRH)
@@ -1903,7 +1907,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	path->port = port;
 
 	if (attr_mask & IB_QP_TIMEOUT)
-		path->ackto_lt = attr->timeout << 3;
+		path->ackto_lt = (alt ? attr->alt_timeout : attr->timeout) << 3;
 
 	if ((qp->ibqp.qp_type == IB_QPT_RAW_PACKET) && qp->sq.wqe_cnt)
 		return modify_raw_packet_eth_prio(dev->mdev,
@@ -2264,7 +2268,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		context->log_pg_sz_remote_qpn = cpu_to_be32(attr->dest_qp_num);
 
 	if (attr_mask & IB_QP_PKEY_INDEX)
-		context->pri_path.pkey_index = attr->pkey_index;
+		context->pri_path.pkey_index = cpu_to_be16(attr->pkey_index);
 
 	/* todo implement counter_index functionality */
 
@@ -2277,7 +2281,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (attr_mask & IB_QP_AV) {
 		err = mlx5_set_path(dev, qp, &attr->ah_attr, &context->pri_path,
 				    attr_mask & IB_QP_PORT ? attr->port_num : qp->port,
-				    attr_mask, 0, attr);
+				    attr_mask, 0, attr, false);
 		if (err)
 			goto out;
 	}
@@ -2288,7 +2292,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (attr_mask & IB_QP_ALT_PATH) {
 		err = mlx5_set_path(dev, qp, &attr->alt_ah_attr,
 				    &context->alt_path,
-				    attr->alt_port_num, attr_mask, 0, attr);
+				    attr->alt_port_num,
+				    attr_mask | IB_QP_PKEY_INDEX | IB_QP_TIMEOUT,
+				    0, attr, true);
 		if (err)
 			goto out;
 	}
@@ -3326,10 +3332,11 @@ static u8 get_fence(u8 fence, struct ib_send_wr *wr)
 			return MLX5_FENCE_MODE_SMALL_AND_FENCE;
 		else
 			return fence;
-
-	} else {
-		return 0;
+	} else if (unlikely(wr->send_flags & IB_SEND_FENCE)) {
+		return MLX5_FENCE_MODE_FENCE;
 	}
+
+	return 0;
 }
 
 static int begin_wqe(struct mlx5_ib_qp *qp, void **seg,
@@ -4013,11 +4020,12 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
 		to_ib_ah_attr(dev, &qp_attr->ah_attr, &context->pri_path);
 		to_ib_ah_attr(dev, &qp_attr->alt_ah_attr, &context->alt_path);
-		qp_attr->alt_pkey_index = context->alt_path.pkey_index & 0x7f;
+		qp_attr->alt_pkey_index =
+			be16_to_cpu(context->alt_path.pkey_index);
 		qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
 	}
 
-	qp_attr->pkey_index = context->pri_path.pkey_index & 0x7f;
+	qp_attr->pkey_index = be16_to_cpu(context->pri_path.pkey_index);
 	qp_attr->port_num = context->pri_path.port;
 
 	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
@@ -4079,17 +4087,19 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
 
 	if (!ibqp->uobject) {
-		qp_attr->cap.max_send_wr  = qp->sq.wqe_cnt;
+		qp_attr->cap.max_send_wr  = qp->sq.max_post;
 		qp_attr->cap.max_send_sge = qp->sq.max_gs;
+		qp_init_attr->qp_context = ibqp->qp_context;
 	} else {
 		qp_attr->cap.max_send_wr  = 0;
 		qp_attr->cap.max_send_sge = 0;
 	}
 
-	/* We don't support inline sends for kernel QPs (yet), and we
-	 * don't know what userspace's value should be.
-	 */
-	qp_attr->cap.max_inline_data = 0;
+	qp_init_attr->qp_type = ibqp->qp_type;
+	qp_init_attr->recv_cq = ibqp->recv_cq;
+	qp_init_attr->send_cq = ibqp->send_cq;
+	qp_init_attr->srq = ibqp->srq;
+	qp_attr->cap.max_inline_data = qp->max_inline_data;
 
 	qp_init_attr->cap	     = qp_attr->cap;
 
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index ff946d5f59e4..382466a90da7 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -2178,6 +2178,11 @@ static ssize_t qib_write(struct file *fp, const char __user *data,
 
 	switch (cmd.type) {
 	case QIB_CMD_ASSIGN_CTXT:
+		if (rcd) {
+			ret = -EINVAL;
+			goto bail;
+		}
+
 		ret = qib_assign_ctxt(fp, &cmd.cmd.user_info);
 		if (ret)
 			goto bail;
diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 7209fbc03ccb..a0b6ebee4d8a 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -36,7 +36,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/sched.h>
 #include <linux/hugetlb.h>
-#include <linux/dma-attrs.h>
 #include <linux/iommu.h>
 #include <linux/workqueue.h>
 #include <linux/list.h>
@@ -112,10 +111,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	int i;
 	int flags;
 	dma_addr_t pa;
-	DEFINE_DMA_ATTRS(attrs);
-
-	if (dmasync)
-		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
 
 	if (!can_do_mlock())
 		return -EPERM;
diff --git a/drivers/infiniband/sw/rdmavt/qp.c b/drivers/infiniband/sw/rdmavt/qp.c
index 5fa4d4d81ee0..41ba7e9cadaa 100644
--- a/drivers/infiniband/sw/rdmavt/qp.c
+++ b/drivers/infiniband/sw/rdmavt/qp.c
@@ -369,8 +369,8 @@ static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
 			/* wrap to first map page, invert bit 0 */
 			offset = qpt->incr | ((offset & 1) ^ 1);
 		}
-		/* there can be no bits at shift and below */
-		WARN_ON(offset & (rdi->dparms.qos_shift - 1));
+		/* there can be no set bits in low-order QoS bits */
+		WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
 		qpn = mk_qpn(qpt, map, offset);
 	}
 
@@ -502,6 +502,12 @@ static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
  */
 static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 		  enum ib_qp_type type)
+	__releases(&qp->s_lock)
+	__releases(&qp->s_hlock)
+	__releases(&qp->r_lock)
+	__acquires(&qp->r_lock)
+	__acquires(&qp->s_hlock)
+	__acquires(&qp->s_lock)
 {
 	if (qp->state != IB_QPS_RESET) {
 		qp->state = IB_QPS_RESET;
@@ -570,12 +576,6 @@ static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
 	qp->s_ssn = 1;
 	qp->s_lsn = 0;
 	qp->s_mig_state = IB_MIG_MIGRATED;
-	if (qp->s_ack_queue)
-		memset(
-			qp->s_ack_queue,
-			0,
-			rvt_max_atomic(rdi) *
-				sizeof(*qp->s_ack_queue));
 	qp->r_head_ack_queue = 0;
 	qp->s_tail_ack_queue = 0;
 	qp->s_num_rd_atomic = 0;
@@ -699,8 +699,10 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
 		 * initialization that is needed.
 		 */
 		priv = rdi->driver_f.qp_priv_alloc(rdi, qp, gfp);
-		if (!priv)
+		if (IS_ERR(priv)) {
+			ret = priv;
 			goto bail_qp;
+		}
 		qp->priv = priv;
 		qp->timeout_jiffies =
 			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
diff --git a/drivers/infiniband/sw/rdmavt/vt.c b/drivers/infiniband/sw/rdmavt/vt.c
index e1cc2cc42f25..30c4fda7a05a 100644
--- a/drivers/infiniband/sw/rdmavt/vt.c
+++ b/drivers/infiniband/sw/rdmavt/vt.c
@@ -501,9 +501,7 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
 			    !rdi->driver_f.quiesce_qp ||
 			    !rdi->driver_f.notify_error_qp ||
 			    !rdi->driver_f.mtu_from_qp ||
-			    !rdi->driver_f.mtu_to_path_mtu ||
-			    !rdi->driver_f.shut_down_port ||
-			    !rdi->driver_f.cap_mask_chg)
+			    !rdi->driver_f.mtu_to_path_mtu)
 				return -EINVAL;
 		break;
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index bab7db6fa9ab..4f7d9b48df64 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -94,6 +94,7 @@ enum {
 	IPOIB_NEIGH_TBL_FLUSH	  = 12,
 	IPOIB_FLAG_DEV_ADDR_SET	  = 13,
 	IPOIB_FLAG_DEV_ADDR_CTRL  = 14,
+	IPOIB_FLAG_GOING_DOWN	  = 15,
 
 	IPOIB_MAX_BACKOFF_SECONDS = 16,
 
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index b2f42835d76d..951d9abcca8b 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -1486,6 +1486,10 @@ static ssize_t set_mode(struct device *d, struct device_attribute *attr,
 {
 	struct net_device *dev = to_net_dev(d);
 	int ret;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	if (test_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags))
+		return -EPERM;
 
 	if (!rtnl_trylock())
 		return restart_syscall();
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 45c40a17d6a6..dc6d241b9406 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -1015,7 +1015,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
 	if (ib_query_gid(priv->ca, priv->port, 0, &gid0, NULL))
 		return false;
 
-	netif_addr_lock(priv->dev);
+	netif_addr_lock_bh(priv->dev);
 
 	/* The subnet prefix may have changed, update it now so we won't have
 	 * to do it later
@@ -1026,12 +1026,12 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
 
 	search_gid.global.interface_id = priv->local_gid.global.interface_id;
 
-	netif_addr_unlock(priv->dev);
+	netif_addr_unlock_bh(priv->dev);
 
 	err = ib_find_gid(priv->ca, &search_gid, IB_GID_TYPE_IB,
 			  priv->dev, &port, &index);
 
-	netif_addr_lock(priv->dev);
+	netif_addr_lock_bh(priv->dev);
 
 	if (search_gid.global.interface_id !=
 	    priv->local_gid.global.interface_id)
@@ -1092,7 +1092,7 @@ static bool ipoib_dev_addr_changed_valid(struct ipoib_dev_priv *priv)
 	}
 
 out:
-	netif_addr_unlock(priv->dev);
+	netif_addr_unlock_bh(priv->dev);
 
 	return ret;
 }
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 2d7c16346648..5f58c41ef787 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1206,7 +1206,9 @@ struct ipoib_neigh *ipoib_neigh_get(struct net_device *dev, u8 *daddr)
 				neigh = NULL;
 				goto out_unlock;
 			}
-			neigh->alive = jiffies;
+
+			if (likely(skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE))
+				neigh->alive = jiffies;
 			goto out_unlock;
 		}
 	}
@@ -1851,7 +1853,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
 	struct ipoib_dev_priv *child_priv;
 	struct net_device *netdev = priv->dev;
 
-	netif_addr_lock(netdev);
+	netif_addr_lock_bh(netdev);
 
 	memcpy(&priv->local_gid.global.interface_id,
 	       &gid->global.interface_id,
@@ -1859,7 +1861,7 @@ static void set_base_guid(struct ipoib_dev_priv *priv, union ib_gid *gid)
 	memcpy(netdev->dev_addr + 4, &priv->local_gid, sizeof(priv->local_gid));
 	clear_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags);
 
-	netif_addr_unlock(netdev);
+	netif_addr_unlock_bh(netdev);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		down_read(&priv->vlan_rwsem);
@@ -1875,7 +1877,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
 	union ib_gid *gid = (union ib_gid *)(ss->__data + 4);
 	int ret = 0;
 
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 
 	/* Make sure the QPN, reserved and subnet prefix match the current
 	 * lladdr, it also makes sure the lladdr is unicast.
@@ -1885,7 +1887,7 @@ static int ipoib_check_lladdr(struct net_device *dev,
 	    gid->global.interface_id == 0)
 		ret = -EINVAL;
 
-	netif_addr_unlock(dev);
+	netif_addr_unlock_bh(dev);
 
 	return ret;
 }
@@ -2141,6 +2143,9 @@ static void ipoib_remove_one(struct ib_device *device, void *client_data)
 		ib_unregister_event_handler(&priv->event_handler);
 		flush_workqueue(ipoib_workqueue);
 
+		/* mark interface in the middle of destruction */
+		set_bit(IPOIB_FLAG_GOING_DOWN, &priv->flags);
+
 		rtnl_lock();
 		dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
 		rtnl_unlock();
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 82fbc9442608..d3394b6add24 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -582,13 +582,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
 		return;
 	}
 	priv->local_lid = port_attr.lid;
-	netif_addr_lock(dev);
+	netif_addr_lock_bh(dev);
 
 	if (!test_bit(IPOIB_FLAG_DEV_ADDR_SET, &priv->flags)) {
-		netif_addr_unlock(dev);
+		netif_addr_unlock_bh(dev);
 		return;
 	}
-	netif_addr_unlock(dev);
+	netif_addr_unlock_bh(dev);
 
 	spin_lock_irq(&priv->lock);
 	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags))
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
index 64a35595eab8..a2f9f29c6ab5 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c
@@ -131,6 +131,9 @@ int ipoib_vlan_add(struct net_device *pdev, unsigned short pkey)
 
 	ppriv = netdev_priv(pdev);
 
+	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
+		return -EPERM;
+
 	snprintf(intf_name, sizeof intf_name, "%s.%04x",
 		 ppriv->dev->name, pkey);
 	priv = ipoib_intf_alloc(intf_name);
@@ -183,6 +186,9 @@ int ipoib_vlan_delete(struct net_device *pdev, unsigned short pkey)
 
 	ppriv = netdev_priv(pdev);
 
+	if (test_bit(IPOIB_FLAG_GOING_DOWN, &ppriv->flags))
+		return -EPERM;
+
 	if (!rtnl_trylock())
 		return restart_syscall();
 
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 646de170ec12..3322ed750172 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -1457,7 +1457,6 @@ static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
 {
 	unsigned int sg_offset = 0;
 
-	state->desc = req->indirect_desc;
 	state->fr.next = req->fr_list;
 	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
 	state->sg = scat;
@@ -1489,7 +1488,6 @@ static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
 	struct scatterlist *sg;
 	int i;
 
-	state->desc = req->indirect_desc;
 	for_each_sg(scat, sg, count, i) {
 		srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
 			     ib_sg_dma_len(dev->dev, sg),
@@ -1655,6 +1653,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
 				   target->indirect_size, DMA_TO_DEVICE);
 
 	memset(&state, 0, sizeof(state));
+	state.desc = req->indirect_desc;
 	if (dev->use_fast_reg)
 		ret = srp_map_sg_fr(&state, ch, req, scat, count);
 	else if (dev->use_fmr)
@@ -3526,7 +3525,7 @@ static void srp_add_one(struct ib_device *device)
 	int mr_page_shift, p;
 	u64 max_pages_per_mr;
 
-	srp_dev = kmalloc(sizeof *srp_dev, GFP_KERNEL);
+	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
 	if (!srp_dev)
 		return;
 
@@ -3586,8 +3585,6 @@ static void srp_add_one(struct ib_device *device)
 						   IB_ACCESS_REMOTE_WRITE);
 		if (IS_ERR(srp_dev->global_mr))
 			goto err_pd;
-	} else {
-		srp_dev->global_mr = NULL;
 	}
 
 	for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index e68b20cba70b..4a4155640d51 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1638,8 +1638,7 @@ retry:
 	 */
 	qp_init->cap.max_send_wr = srp_sq_size / 2;
 	qp_init->cap.max_rdma_ctxs = srp_sq_size / 2;
-	qp_init->cap.max_send_sge = max(sdev->device->attrs.max_sge_rd,
-					sdev->device->attrs.max_sge);
+	qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
 	qp_init->port_num = ch->sport->port;
 
 	ch->qp = ib_create_qp(sdev->pd, qp_init);
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.h b/drivers/infiniband/ulp/srpt/ib_srpt.h
index fee6bfd7ca21..389030487da7 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.h
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.h
@@ -106,6 +106,7 @@ enum {
 	SRP_LOGIN_RSP_MULTICHAN_MAINTAINED = 0x2,
 
 	SRPT_DEF_SG_TABLESIZE = 128,
+	SRPT_DEF_SG_PER_WQE = 16,
 
 	MIN_SRPT_SQ_SIZE = 16,
 	DEF_SRPT_SQ_SIZE = 4096,
diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c
index 94b68213c50d..5f6b3bcab078 100644
--- a/drivers/iommu/arm-smmu-v3.c
+++ b/drivers/iommu/arm-smmu-v3.c
@@ -1941,6 +1941,7 @@ static struct iommu_ops arm_smmu_ops = {
 	.attach_dev		= arm_smmu_attach_dev,
 	.map			= arm_smmu_map,
 	.unmap			= arm_smmu_unmap,
+	.map_sg			= default_iommu_map_sg,
 	.iova_to_phys		= arm_smmu_iova_to_phys,
 	.add_device		= arm_smmu_add_device,
 	.remove_device		= arm_smmu_remove_device,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a644d0cec2d8..10700945994e 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3222,11 +3222,6 @@ static int __init init_dmars(void)
 			}
 		}
 
-		iommu_flush_write_buffer(iommu);
-		iommu_set_root_entry(iommu);
-		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
-		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
-
 		if (!ecap_pass_through(iommu->ecap))
 			hw_pass_through = 0;
 #ifdef CONFIG_INTEL_IOMMU_SVM
@@ -3235,6 +3230,18 @@ static int __init init_dmars(void)
 #endif
 	}
 
+	/*
+	 * Now that qi is enabled on all iommus, set the root entry and flush
+	 * caches. This is required on some Intel X58 chipsets, otherwise the
+	 * flush_context function will loop forever and the boot hangs.
+	 */
+	for_each_active_iommu(iommu, drhd) {
+		iommu_flush_write_buffer(iommu);
+		iommu_set_root_entry(iommu);
+		iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
+		iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
+	}
+
 	if (iommu_pass_through)
 		iommu_identity_mapping |= IDENTMAP_ALL;
 
diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index c7d6156ff536..25b4627cb57f 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -815,7 +815,7 @@ static int rk_iommu_attach_device(struct iommu_domain *domain,
 	dte_addr = virt_to_phys(rk_domain->dt);
 	for (i = 0; i < iommu->num_mmu; i++) {
 		rk_iommu_write(iommu->bases[i], RK_MMU_DTE_ADDR, dte_addr);
-		rk_iommu_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
+		rk_iommu_base_command(iommu->bases[i], RK_MMU_CMD_ZAP_CACHE);
 		rk_iommu_write(iommu->bases[i], RK_MMU_INT_MASK, RK_MMU_IRQ_MASK);
 	}
 
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 3b5e10aa48ab..8a4adbeb2b8c 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -746,6 +746,12 @@ static int gic_irq_domain_alloc(struct irq_domain *d, unsigned int virq,
 		/* verify that it doesn't conflict with an IPI irq */
 		if (test_bit(spec->hwirq, ipi_resrv))
 			return -EBUSY;
+
+		hwirq = GIC_SHARED_TO_HWIRQ(spec->hwirq);
+
+		return irq_domain_set_hwirq_and_chip(d, virq, hwirq,
+						     &gic_level_irq_controller,
+						     NULL);
 	} else {
 		base_hwirq = find_first_bit(ipi_resrv, gic_shared_intrs);
 		if (base_hwirq == gic_shared_intrs) {
@@ -867,10 +873,14 @@ static int gic_dev_domain_alloc(struct irq_domain *d, unsigned int virq,
 						    &gic_level_irq_controller,
 						    NULL);
 		if (ret)
-			return ret;
+			goto error;
 	}
 
 	return 0;
+
+error:
+	irq_domain_free_irqs_parent(d, virq, nr_irqs);
+	return ret;
 }
 
 void gic_dev_domain_free(struct irq_domain *d, unsigned int virq,
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 3495d5d6547f..3bce44893021 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -53,11 +53,12 @@ static void led_timer_function(unsigned long data)
 
 	if (!led_cdev->blink_delay_on || !led_cdev->blink_delay_off) {
 		led_set_brightness_nosleep(led_cdev, LED_OFF);
+		led_cdev->flags &= ~LED_BLINK_SW;
 		return;
 	}
 
 	if (led_cdev->flags & LED_BLINK_ONESHOT_STOP) {
-		led_cdev->flags &= ~LED_BLINK_ONESHOT_STOP;
+		led_cdev->flags &=  ~(LED_BLINK_ONESHOT_STOP | LED_BLINK_SW);
 		return;
 	}
 
@@ -151,6 +152,7 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
 		return;
 	}
 
+	led_cdev->flags |= LED_BLINK_SW;
 	mod_timer(&led_cdev->blink_timer, jiffies + 1);
 }
 
@@ -219,6 +221,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev)
 	del_timer_sync(&led_cdev->blink_timer);
 	led_cdev->blink_delay_on = 0;
 	led_cdev->blink_delay_off = 0;
+	led_cdev->flags &= ~LED_BLINK_SW;
 }
 EXPORT_SYMBOL_GPL(led_stop_software_blink);
 
@@ -226,10 +229,10 @@ void led_set_brightness(struct led_classdev *led_cdev,
 			enum led_brightness brightness)
 {
 	/*
-	 * In case blinking is on delay brightness setting
+	 * If software blink is active, delay brightness setting
 	 * until the next timer tick.
 	 */
-	if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) {
+	if (led_cdev->flags & LED_BLINK_SW) {
 		/*
 		 * If we need to disable soft blinking delegate this to the
 		 * work queue task to avoid problems in case we are called
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index 410c39c62dc7..c9f386213e9e 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -19,6 +19,7 @@
 #include <linux/sched.h>
 #include <linux/leds.h>
 #include <linux/reboot.h>
+#include <linux/suspend.h>
 #include "../leds.h"
 
 static int panic_heartbeats;
@@ -154,6 +155,30 @@ static struct led_trigger heartbeat_led_trigger = {
 	.deactivate = heartbeat_trig_deactivate,
 };
 
+static int heartbeat_pm_notifier(struct notifier_block *nb,
+				 unsigned long pm_event, void *unused)
+{
+	int rc;
+
+	switch (pm_event) {
+	case PM_SUSPEND_PREPARE:
+	case PM_HIBERNATION_PREPARE:
+	case PM_RESTORE_PREPARE:
+		led_trigger_unregister(&heartbeat_led_trigger);
+		break;
+	case PM_POST_SUSPEND:
+	case PM_POST_HIBERNATION:
+	case PM_POST_RESTORE:
+		rc = led_trigger_register(&heartbeat_led_trigger);
+		if (rc)
+			pr_err("could not re-register heartbeat trigger\n");
+		break;
+	default:
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
 static int heartbeat_reboot_notifier(struct notifier_block *nb,
 				     unsigned long code, void *unused)
 {
@@ -168,6 +193,10 @@ static int heartbeat_panic_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static struct notifier_block heartbeat_pm_nb = {
+	.notifier_call = heartbeat_pm_notifier,
+};
+
 static struct notifier_block heartbeat_reboot_nb = {
 	.notifier_call = heartbeat_reboot_notifier,
 };
@@ -184,12 +213,14 @@ static int __init heartbeat_trig_init(void)
 		atomic_notifier_chain_register(&panic_notifier_list,
 					       &heartbeat_panic_nb);
 		register_reboot_notifier(&heartbeat_reboot_nb);
+		register_pm_notifier(&heartbeat_pm_nb);
 	}
 	return rc;
 }
 
 static void __exit heartbeat_trig_exit(void)
 {
+	unregister_pm_notifier(&heartbeat_pm_nb);
 	unregister_reboot_notifier(&heartbeat_reboot_nb);
 	atomic_notifier_chain_unregister(&panic_notifier_list,
 					 &heartbeat_panic_nb);
diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
index b73c6e7d28e4..6f2c8522e14a 100644
--- a/drivers/mcb/mcb-core.c
+++ b/drivers/mcb/mcb-core.c
@@ -61,21 +61,36 @@ static int mcb_probe(struct device *dev)
 	struct mcb_driver *mdrv = to_mcb_driver(dev->driver);
 	struct mcb_device *mdev = to_mcb_device(dev);
 	const struct mcb_device_id *found_id;
+	struct module *carrier_mod;
+	int ret;
 
 	found_id = mcb_match_id(mdrv->id_table, mdev);
 	if (!found_id)
 		return -ENODEV;
 
-	return mdrv->probe(mdev, found_id);
+	carrier_mod = mdev->dev.parent->driver->owner;
+	if (!try_module_get(carrier_mod))
+		return -EINVAL;
+
+	get_device(dev);
+	ret = mdrv->probe(mdev, found_id);
+	if (ret)
+		module_put(carrier_mod);
+
+	return ret;
 }
 
 static int mcb_remove(struct device *dev)
 {
 	struct mcb_driver *mdrv = to_mcb_driver(dev->driver);
 	struct mcb_device *mdev = to_mcb_device(dev);
+	struct module *carrier_mod;
 
 	mdrv->remove(mdev);
 
+	carrier_mod = mdev->dev.parent->driver->owner;
+	module_put(carrier_mod);
+
 	put_device(&mdev->dev);
 
 	return 0;
diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
index d7723ce772b3..c04bc6afb965 100644
--- a/drivers/media/usb/uvc/uvc_v4l2.c
+++ b/drivers/media/usb/uvc/uvc_v4l2.c
@@ -1274,8 +1274,6 @@ struct uvc_xu_control_mapping32 {
 static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 			const struct uvc_xu_control_mapping32 __user *up)
 {
-	struct uvc_menu_info __user *umenus;
-	struct uvc_menu_info __user *kmenus;
 	compat_caddr_t p;
 
 	if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
@@ -1292,17 +1290,7 @@ static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 
 	if (__get_user(p, &up->menu_info))
 		return -EFAULT;
-	umenus = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, umenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
-
-	kmenus = compat_alloc_user_space(kp->menu_count * sizeof(*kmenus));
-	if (kmenus == NULL)
-		return -EFAULT;
-	kp->menu_info = kmenus;
-
-	if (copy_in_user(kmenus, umenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
+	kp->menu_info = compat_ptr(p);
 
 	return 0;
 }
@@ -1310,10 +1298,6 @@ static int uvc_v4l2_get_xu_mapping(struct uvc_xu_control_mapping *kp,
 static int uvc_v4l2_put_xu_mapping(const struct uvc_xu_control_mapping *kp,
 			struct uvc_xu_control_mapping32 __user *up)
 {
-	struct uvc_menu_info __user *umenus;
-	struct uvc_menu_info __user *kmenus = kp->menu_info;
-	compat_caddr_t p;
-
 	if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
 	    __copy_to_user(up, kp, offsetof(typeof(*up), menu_info)) ||
 	    __put_user(kp->menu_count, &up->menu_count))
@@ -1322,16 +1306,6 @@ static int uvc_v4l2_put_xu_mapping(const struct uvc_xu_control_mapping *kp,
 	if (__clear_user(up->reserved, sizeof(up->reserved)))
 		return -EFAULT;
 
-	if (kp->menu_count == 0)
-		return 0;
-
-	if (get_user(p, &up->menu_info))
-		return -EFAULT;
-	umenus = compat_ptr(p);
-
-	if (copy_in_user(umenus, kmenus, kp->menu_count * sizeof(*umenus)))
-		return -EFAULT;
-
 	return 0;
 }
 
@@ -1346,8 +1320,6 @@ struct uvc_xu_control_query32 {
 static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 			const struct uvc_xu_control_query32 __user *up)
 {
-	u8 __user *udata;
-	u8 __user *kdata;
 	compat_caddr_t p;
 
 	if (!access_ok(VERIFY_READ, up, sizeof(*up)) ||
@@ -1361,17 +1333,7 @@ static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 
 	if (__get_user(p, &up->data))
 		return -EFAULT;
-	udata = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, udata, kp->size))
-		return -EFAULT;
-
-	kdata = compat_alloc_user_space(kp->size);
-	if (kdata == NULL)
-		return -EFAULT;
-	kp->data = kdata;
-
-	if (copy_in_user(kdata, udata, kp->size))
-		return -EFAULT;
+	kp->data = compat_ptr(p);
 
 	return 0;
 }
@@ -1379,26 +1341,10 @@ static int uvc_v4l2_get_xu_query(struct uvc_xu_control_query *kp,
 static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp,
 			struct uvc_xu_control_query32 __user *up)
 {
-	u8 __user *udata;
-	u8 __user *kdata = kp->data;
-	compat_caddr_t p;
-
 	if (!access_ok(VERIFY_WRITE, up, sizeof(*up)) ||
 	    __copy_to_user(up, kp, offsetof(typeof(*up), data)))
 		return -EFAULT;
 
-	if (kp->size == 0)
-		return 0;
-
-	if (get_user(p, &up->data))
-		return -EFAULT;
-	udata = compat_ptr(p);
-	if (!access_ok(VERIFY_READ, udata, kp->size))
-		return -EFAULT;
-
-	if (copy_in_user(udata, kdata, kp->size))
-		return -EFAULT;
-
 	return 0;
 }
 
@@ -1408,47 +1354,44 @@ static int uvc_v4l2_put_xu_query(const struct uvc_xu_control_query *kp,
 static long uvc_v4l2_compat_ioctl32(struct file *file,
 		     unsigned int cmd, unsigned long arg)
 {
+	struct uvc_fh *handle = file->private_data;
 	union {
 		struct uvc_xu_control_mapping xmap;
 		struct uvc_xu_control_query xqry;
 	} karg;
 	void __user *up = compat_ptr(arg);
-	mm_segment_t old_fs;
 	long ret;
 
 	switch (cmd) {
 	case UVCIOC_CTRL_MAP32:
-		cmd = UVCIOC_CTRL_MAP;
 		ret = uvc_v4l2_get_xu_mapping(&karg.xmap, up);
+		if (ret)
+			return ret;
+		ret = uvc_ioctl_ctrl_map(handle->chain, &karg.xmap);
+		if (ret)
+			return ret;
+		ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up);
+		if (ret)
+			return ret;
+
 		break;
 
 	case UVCIOC_CTRL_QUERY32:
-		cmd = UVCIOC_CTRL_QUERY;
 		ret = uvc_v4l2_get_xu_query(&karg.xqry, up);
+		if (ret)
+			return ret;
+		ret = uvc_xu_ctrl_query(handle->chain, &karg.xqry);
+		if (ret)
+			return ret;
+		ret = uvc_v4l2_put_xu_query(&karg.xqry, up);
+		if (ret)
+			return ret;
 		break;
 
 	default:
 		return -ENOIOCTLCMD;
 	}
 
-	old_fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = video_ioctl2(file, cmd, (unsigned long)&karg);
-	set_fs(old_fs);
-
-	if (ret < 0)
-		return ret;
-
-	switch (cmd) {
-	case UVCIOC_CTRL_MAP:
-		ret = uvc_v4l2_put_xu_mapping(&karg.xmap, up);
-		break;
-
-	case UVCIOC_CTRL_QUERY:
-		ret = uvc_v4l2_put_xu_query(&karg.xqry, up);
-		break;
-	}
-
 	return ret;
 }
 #endif
diff --git a/drivers/media/v4l2-core/v4l2-mc.c b/drivers/media/v4l2-core/v4l2-mc.c
index ca94bded3386..8bef4331bd51 100644
--- a/drivers/media/v4l2-core/v4l2-mc.c
+++ b/drivers/media/v4l2-core/v4l2-mc.c
@@ -1,7 +1,7 @@
 /*
  * Media Controller ancillary functions
  *
- * Copyright (c) 2016 Mauro Carvalho Chehab <mchehab@osg.samsung.com>
+ * Copyright (c) 2016 Mauro Carvalho Chehab <mchehab@kernel.org>
  * Copyright (C) 2016 Shuah Khan <shuahkh@osg.samsung.com>
  * Copyright (C) 2006-2010 Nokia Corporation
  * Copyright (c) 2016 Intel Corporation.
diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c
index af4884ba6b7c..15508df24e5d 100644
--- a/drivers/memory/omap-gpmc.c
+++ b/drivers/memory/omap-gpmc.c
@@ -398,7 +398,7 @@ static void gpmc_cs_bool_timings(int cs, const struct gpmc_bool_timings *p)
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4,
 			   GPMC_CONFIG4_OEEXTRADELAY, p->oe_extra_delay);
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG4,
-			   GPMC_CONFIG4_OEEXTRADELAY, p->we_extra_delay);
+			   GPMC_CONFIG4_WEEXTRADELAY, p->we_extra_delay);
 	gpmc_cs_modify_reg(cs, GPMC_CS_CONFIG6,
 			   GPMC_CONFIG6_CYCLE2CYCLESAMECSEN,
 			   p->cycle2cyclesamecsen);
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index eed254da63a8..641c1a566687 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -730,7 +730,7 @@ static void mei_cl_wake_all(struct mei_cl *cl)
 	/* synchronized under device mutex */
 	if (waitqueue_active(&cl->wait)) {
 		cl_dbg(dev, cl, "Waking up ctrl write clients!\n");
-		wake_up_interruptible(&cl->wait);
+		wake_up(&cl->wait);
 	}
 }
 
diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
index 16baeb51b2bd..ef3618299494 100644
--- a/drivers/mtd/ubi/build.c
+++ b/drivers/mtd/ubi/build.c
@@ -1147,11 +1147,17 @@ int ubi_detach_mtd_dev(int ubi_num, int anyway)
  */
 static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 {
-	struct kstat stat;
 	int err, minor;
+	struct path path;
+	struct kstat stat;
 
 	/* Probably this is an MTD character device node path */
-	err = vfs_stat(mtd_dev, &stat);
+	err = kern_path(mtd_dev, LOOKUP_FOLLOW, &path);
+	if (err)
+		return ERR_PTR(err);
+
+	err = vfs_getattr(&path, &stat);
+	path_put(&path);
 	if (err)
 		return ERR_PTR(err);
 
@@ -1160,6 +1166,7 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev)
 		return ERR_PTR(-EINVAL);
 
 	minor = MINOR(stat.rdev);
+
 	if (minor & 1)
 		/*
 		 * Just do not think the "/dev/mtdrX" devices support is need,
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 5780dd1ba79d..ebf517271d29 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -575,6 +575,7 @@ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum,
 	int err, idx = vol_id2idx(ubi, vol_id), new_pnum, data_size, tries = 0;
 	struct ubi_volume *vol = ubi->volumes[idx];
 	struct ubi_vid_hdr *vid_hdr;
+	uint32_t crc;
 
 	vid_hdr = ubi_zalloc_vid_hdr(ubi, GFP_NOFS);
 	if (!vid_hdr)
@@ -599,14 +600,8 @@ retry:
 		goto out_put;
 	}
 
-	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
-	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
-	if (err) {
-		up_read(&ubi->fm_eba_sem);
-		goto write_error;
-	}
+	ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC);
 
-	data_size = offset + len;
 	mutex_lock(&ubi->buf_mutex);
 	memset(ubi->peb_buf + offset, 0xFF, len);
 
@@ -621,6 +616,19 @@ retry:
 
 	memcpy(ubi->peb_buf + offset, buf, len);
 
+	data_size = offset + len;
+	crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size);
+	vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
+	vid_hdr->copy_flag = 1;
+	vid_hdr->data_size = cpu_to_be32(data_size);
+	vid_hdr->data_crc = cpu_to_be32(crc);
+	err = ubi_io_write_vid_hdr(ubi, new_pnum, vid_hdr);
+	if (err) {
+		mutex_unlock(&ubi->buf_mutex);
+		up_read(&ubi->fm_eba_sem);
+		goto write_error;
+	}
+
 	err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size);
 	if (err) {
 		mutex_unlock(&ubi->buf_mutex);
diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c
index 348dbbcbedc8..a9e2cef7c95c 100644
--- a/drivers/mtd/ubi/kapi.c
+++ b/drivers/mtd/ubi/kapi.c
@@ -302,6 +302,7 @@ EXPORT_SYMBOL_GPL(ubi_open_volume_nm);
 struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 {
 	int error, ubi_num, vol_id;
+	struct path path;
 	struct kstat stat;
 
 	dbg_gen("open volume %s, mode %d", pathname, mode);
@@ -309,7 +310,12 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode)
 	if (!pathname || !*pathname)
 		return ERR_PTR(-EINVAL);
 
-	error = vfs_stat(pathname, &stat);
+	error = kern_path(pathname, LOOKUP_FOLLOW, &path);
+	if (error)
+		return ERR_PTR(error);
+
+	error = vfs_getattr(&path, &stat);
+	path_put(&path);
 	if (error)
 		return ERR_PTR(error);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c5fe915870ad..a59d55e25d5f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12895,52 +12895,71 @@ static int __bnx2x_vlan_configure_vid(struct bnx2x *bp, u16 vid, bool add)
 	return rc;
 }
 
-int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp)
+static int bnx2x_vlan_configure_vid_list(struct bnx2x *bp)
 {
 	struct bnx2x_vlan_entry *vlan;
 	int rc = 0;
 
-	if (!bp->vlan_cnt) {
-		DP(NETIF_MSG_IFUP, "No need to re-configure vlan filters\n");
-		return 0;
-	}
-
+	/* Configure all non-configured entries */
 	list_for_each_entry(vlan, &bp->vlan_reg, link) {
-		/* Prepare for cleanup in case of errors */
-		if (rc) {
-			vlan->hw = false;
-			continue;
-		}
-
-		if (!vlan->hw)
+		if (vlan->hw)
 			continue;
 
-		DP(NETIF_MSG_IFUP, "Re-configuring vlan 0x%04x\n", vlan->vid);
+		if (bp->vlan_cnt >= bp->vlan_credit)
+			return -ENOBUFS;
 
 		rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true);
 		if (rc) {
-			BNX2X_ERR("Unable to configure VLAN %d\n", vlan->vid);
-			vlan->hw = false;
-			rc = -EINVAL;
-			continue;
+			BNX2X_ERR("Unable to config VLAN %d\n", vlan->vid);
+			return rc;
 		}
+
+		DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n", vlan->vid);
+		vlan->hw = true;
+		bp->vlan_cnt++;
 	}
 
-	return rc;
+	return 0;
+}
+
+static void bnx2x_vlan_configure(struct bnx2x *bp, bool set_rx_mode)
+{
+	bool need_accept_any_vlan;
+
+	need_accept_any_vlan = !!bnx2x_vlan_configure_vid_list(bp);
+
+	if (bp->accept_any_vlan != need_accept_any_vlan) {
+		bp->accept_any_vlan = need_accept_any_vlan;
+		DP(NETIF_MSG_IFUP, "Accept all VLAN %s\n",
+		   bp->accept_any_vlan ? "raised" : "cleared");
+		if (set_rx_mode) {
+			if (IS_PF(bp))
+				bnx2x_set_rx_mode_inner(bp);
+			else
+				bnx2x_vfpf_storm_rx_mode(bp);
+		}
+	}
+}
+
+int bnx2x_vlan_reconfigure_vid(struct bnx2x *bp)
+{
+	struct bnx2x_vlan_entry *vlan;
+
+	/* The hw forgot all entries after reload */
+	list_for_each_entry(vlan, &bp->vlan_reg, link)
+		vlan->hw = false;
+	bp->vlan_cnt = 0;
+
+	/* Don't set rx mode here. Our caller will do it. */
+	bnx2x_vlan_configure(bp, false);
+
+	return 0;
 }
 
 static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	struct bnx2x_vlan_entry *vlan;
-	bool hw = false;
-	int rc = 0;
-
-	if (!netif_running(bp->dev)) {
-		DP(NETIF_MSG_IFUP,
-		   "Ignoring VLAN configuration the interface is down\n");
-		return -EFAULT;
-	}
 
 	DP(NETIF_MSG_IFUP, "Adding VLAN %d\n", vid);
 
@@ -12948,93 +12967,47 @@ static int bnx2x_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 	if (!vlan)
 		return -ENOMEM;
 
-	bp->vlan_cnt++;
-	if (bp->vlan_cnt > bp->vlan_credit && !bp->accept_any_vlan) {
-		DP(NETIF_MSG_IFUP, "Accept all VLAN raised\n");
-		bp->accept_any_vlan = true;
-		if (IS_PF(bp))
-			bnx2x_set_rx_mode_inner(bp);
-		else
-			bnx2x_vfpf_storm_rx_mode(bp);
-	} else if (bp->vlan_cnt <= bp->vlan_credit) {
-		rc = __bnx2x_vlan_configure_vid(bp, vid, true);
-		hw = true;
-	}
-
 	vlan->vid = vid;
-	vlan->hw = hw;
+	vlan->hw = false;
+	list_add_tail(&vlan->link, &bp->vlan_reg);
 
-	if (!rc) {
-		list_add(&vlan->link, &bp->vlan_reg);
-	} else {
-		bp->vlan_cnt--;
-		kfree(vlan);
-	}
-
-	DP(NETIF_MSG_IFUP, "Adding VLAN result %d\n", rc);
+	if (netif_running(dev))
+		bnx2x_vlan_configure(bp, true);
 
-	return rc;
+	return 0;
 }
 
 static int bnx2x_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	struct bnx2x_vlan_entry *vlan;
+	bool found = false;
 	int rc = 0;
 
-	if (!netif_running(bp->dev)) {
-		DP(NETIF_MSG_IFUP,
-		   "Ignoring VLAN configuration the interface is down\n");
-		return -EFAULT;
-	}
-
 	DP(NETIF_MSG_IFUP, "Removing VLAN %d\n", vid);
 
-	if (!bp->vlan_cnt) {
-		BNX2X_ERR("Unable to kill VLAN %d\n", vid);
-		return -EINVAL;
-	}
-
 	list_for_each_entry(vlan, &bp->vlan_reg, link)
-		if (vlan->vid == vid)
+		if (vlan->vid == vid) {
+			found = true;
 			break;
+		}
 
-	if (vlan->vid != vid) {
+	if (!found) {
 		BNX2X_ERR("Unable to kill VLAN %d - not found\n", vid);
 		return -EINVAL;
 	}
 
-	if (vlan->hw)
+	if (netif_running(dev) && vlan->hw) {
 		rc = __bnx2x_vlan_configure_vid(bp, vid, false);
+		DP(NETIF_MSG_IFUP, "HW deconfigured for VLAN %d\n", vid);
+		bp->vlan_cnt--;
+	}
 
 	list_del(&vlan->link);
 	kfree(vlan);
 
-	bp->vlan_cnt--;
-
-	if (bp->vlan_cnt <= bp->vlan_credit && bp->accept_any_vlan) {
-		/* Configure all non-configured entries */
-		list_for_each_entry(vlan, &bp->vlan_reg, link) {
-			if (vlan->hw)
-				continue;
-
-			rc = __bnx2x_vlan_configure_vid(bp, vlan->vid, true);
-			if (rc) {
-				BNX2X_ERR("Unable to config VLAN %d\n",
-					  vlan->vid);
-				continue;
-			}
-			DP(NETIF_MSG_IFUP, "HW configured for VLAN %d\n",
-			   vlan->vid);
-			vlan->hw = true;
-		}
-		DP(NETIF_MSG_IFUP, "Accept all VLAN Removed\n");
-		bp->accept_any_vlan = false;
-		if (IS_PF(bp))
-			bnx2x_set_rx_mode_inner(bp);
-		else
-			bnx2x_vfpf_storm_rx_mode(bp);
-	}
+	if (netif_running(dev))
+		bnx2x_vlan_configure(bp, true);
 
 	DP(NETIF_MSG_IFUP, "Removing VLAN result %d\n", rc);
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 72a2efff8e49..c777cde85ce4 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -286,7 +286,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			cpu_to_le32(DB_KEY_TX_PUSH | DB_LONG_TX_PUSH | prod);
 		txr->tx_prod = prod;
 
+		tx_buf->is_push = 1;
 		netdev_tx_sent_queue(txq, skb->len);
+		wmb();	/* Sync is_push and byte queue before pushing data */
 
 		push_len = (length + sizeof(*tx_push) + 7) / 8;
 		if (push_len > 16) {
@@ -298,7 +300,6 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
 					 push_len);
 		}
 
-		tx_buf->is_push = 1;
 		goto tx_done;
 	}
 
@@ -1112,19 +1113,13 @@ static inline struct sk_buff *bnxt_tpa_end(struct bnxt *bp,
 	if (tpa_info->hash_type != PKT_HASH_TYPE_NONE)
 		skb_set_hash(skb, tpa_info->rss_hash, tpa_info->hash_type);
 
-	if (tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) {
-		netdev_features_t features = skb->dev->features;
+	if ((tpa_info->flags2 & RX_CMP_FLAGS2_META_FORMAT_VLAN) &&
+	    (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
 		u16 vlan_proto = tpa_info->metadata >>
 			RX_CMP_FLAGS2_METADATA_TPID_SFT;
+		u16 vtag = tpa_info->metadata & RX_CMP_FLAGS2_METADATA_VID_MASK;
 
-		if (((features & NETIF_F_HW_VLAN_CTAG_RX) &&
-		     vlan_proto == ETH_P_8021Q) ||
-		    ((features & NETIF_F_HW_VLAN_STAG_RX) &&
-		     vlan_proto == ETH_P_8021AD)) {
-			__vlan_hwaccel_put_tag(skb, htons(vlan_proto),
-					       tpa_info->metadata &
-					       RX_CMP_FLAGS2_METADATA_VID_MASK);
-		}
+		__vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
 	}
 
 	skb_checksum_none_assert(skb);
@@ -1277,19 +1272,14 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_napi *bnapi, u32 *raw_cons,
 
 	skb->protocol = eth_type_trans(skb, dev);
 
-	if (rxcmp1->rx_cmp_flags2 &
-	    cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) {
-		netdev_features_t features = skb->dev->features;
+	if ((rxcmp1->rx_cmp_flags2 &
+	     cpu_to_le32(RX_CMP_FLAGS2_META_FORMAT_VLAN)) &&
+	    (skb->dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
 		u32 meta_data = le32_to_cpu(rxcmp1->rx_cmp_meta_data);
+		u16 vtag = meta_data & RX_CMP_FLAGS2_METADATA_VID_MASK;
 		u16 vlan_proto = meta_data >> RX_CMP_FLAGS2_METADATA_TPID_SFT;
 
-		if (((features & NETIF_F_HW_VLAN_CTAG_RX) &&
-		     vlan_proto == ETH_P_8021Q) ||
-		    ((features & NETIF_F_HW_VLAN_STAG_RX) &&
-		     vlan_proto == ETH_P_8021AD))
-			__vlan_hwaccel_put_tag(skb, htons(vlan_proto),
-					       meta_data &
-					       RX_CMP_FLAGS2_METADATA_VID_MASK);
+		__vlan_hwaccel_put_tag(skb, htons(vlan_proto), vtag);
 	}
 
 	skb_checksum_none_assert(skb);
@@ -5466,6 +5456,20 @@ static netdev_features_t bnxt_fix_features(struct net_device *dev,
 
 	if (!bnxt_rfs_capable(bp))
 		features &= ~NETIF_F_NTUPLE;
+
+	/* Both CTAG and STAG VLAN accelaration on the RX side have to be
+	 * turned on or off together.
+	 */
+	if ((features & (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) !=
+	    (NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_STAG_RX)) {
+		if (dev->features & NETIF_F_HW_VLAN_CTAG_RX)
+			features &= ~(NETIF_F_HW_VLAN_CTAG_RX |
+				      NETIF_F_HW_VLAN_STAG_RX);
+		else
+			features |= NETIF_F_HW_VLAN_CTAG_RX |
+				    NETIF_F_HW_VLAN_STAG_RX;
+	}
+
 	return features;
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index a2cdfc1261dc..50812a1d67bd 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -144,6 +144,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x5015),	/* T502-bt */
 	CH_PCI_ID_TABLE_FENTRY(0x5016),	/* T580-OCP-SO */
 	CH_PCI_ID_TABLE_FENTRY(0x5017),	/* T520-OCP-SO */
+	CH_PCI_ID_TABLE_FENTRY(0x5018),	/* T540-BT */
 	CH_PCI_ID_TABLE_FENTRY(0x5080),	/* Custom T540-cr */
 	CH_PCI_ID_TABLE_FENTRY(0x5081),	/* Custom T540-LL-cr */
 	CH_PCI_ID_TABLE_FENTRY(0x5082),	/* Custom T504-cr */
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index 41b010645100..4edb98c3c6c7 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -1195,7 +1195,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	priv->mdio = mdiobus_alloc();
 	if (!priv->mdio) {
 		ret = -ENOMEM;
-		goto free;
+		goto free2;
 	}
 
 	priv->mdio->name = "ethoc-mdio";
@@ -1208,7 +1208,7 @@ static int ethoc_probe(struct platform_device *pdev)
 	ret = mdiobus_register(priv->mdio);
 	if (ret) {
 		dev_err(&netdev->dev, "failed to register MDIO bus\n");
-		goto free;
+		goto free2;
 	}
 
 	ret = ethoc_mdio_probe(netdev);
@@ -1241,9 +1241,10 @@ error2:
 error:
 	mdiobus_unregister(priv->mdio);
 	mdiobus_free(priv->mdio);
-free:
+free2:
 	if (priv->clk)
 		clk_disable_unprepare(priv->clk);
+free:
 	free_netdev(netdev);
 out:
 	return ret;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 3c0255e98535..fea0f330ddbd 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2416,24 +2416,24 @@ fec_enet_set_coalesce(struct net_device *ndev, struct ethtool_coalesce *ec)
 		return -EOPNOTSUPP;
 
 	if (ec->rx_max_coalesced_frames > 255) {
-		pr_err("Rx coalesced frames exceed hardware limiation");
+		pr_err("Rx coalesced frames exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	if (ec->tx_max_coalesced_frames > 255) {
-		pr_err("Tx coalesced frame exceed hardware limiation");
+		pr_err("Tx coalesced frame exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	cycle = fec_enet_us_to_itr_clock(ndev, fep->rx_time_itr);
 	if (cycle > 0xFFFF) {
-		pr_err("Rx coalesed usec exceeed hardware limiation");
+		pr_err("Rx coalesced usec exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
 	cycle = fec_enet_us_to_itr_clock(ndev, fep->tx_time_itr);
 	if (cycle > 0xFFFF) {
-		pr_err("Rx coalesed usec exceeed hardware limiation");
+		pr_err("Rx coalesced usec exceed hardware limitation\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 7615e0668acb..2e6785b6e8be 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2440,7 +2440,8 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 						 tx_queue->tx_ring_size);
 
 	if (likely(!nr_frags)) {
-		lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
+		if (likely(!do_tstamp))
+			lstatus |= BD_LFLAG(TXBD_LAST | TXBD_INTERRUPT);
 	} else {
 		u32 lstatus_start = lstatus;
 
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index c984462fad2a..4763252bbf85 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -133,6 +133,8 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg)
 static void mtk_phy_link_adjust(struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
+	u16 lcl_adv = 0, rmt_adv = 0;
+	u8 flowctrl;
 	u32 mcr = MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG |
 		  MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN |
 		  MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN |
@@ -150,11 +152,30 @@ static void mtk_phy_link_adjust(struct net_device *dev)
 	if (mac->phy_dev->link)
 		mcr |= MAC_MCR_FORCE_LINK;
 
-	if (mac->phy_dev->duplex)
+	if (mac->phy_dev->duplex) {
 		mcr |= MAC_MCR_FORCE_DPX;
 
-	if (mac->phy_dev->pause)
-		mcr |= MAC_MCR_FORCE_RX_FC | MAC_MCR_FORCE_TX_FC;
+		if (mac->phy_dev->pause)
+			rmt_adv = LPA_PAUSE_CAP;
+		if (mac->phy_dev->asym_pause)
+			rmt_adv |= LPA_PAUSE_ASYM;
+
+		if (mac->phy_dev->advertising & ADVERTISED_Pause)
+			lcl_adv |= ADVERTISE_PAUSE_CAP;
+		if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause)
+			lcl_adv |= ADVERTISE_PAUSE_ASYM;
+
+		flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+
+		if (flowctrl & FLOW_CTRL_TX)
+			mcr |= MAC_MCR_FORCE_TX_FC;
+		if (flowctrl & FLOW_CTRL_RX)
+			mcr |= MAC_MCR_FORCE_RX_FC;
+
+		netif_dbg(mac->hw, link, dev, "rx pause %s, tx pause %s\n",
+			  flowctrl & FLOW_CTRL_RX ? "enabled" : "disabled",
+			  flowctrl & FLOW_CTRL_TX ? "enabled" : "disabled");
+	}
 
 	mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id));
 
@@ -208,10 +229,16 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 	u32 val, ge_mode;
 
 	np = of_parse_phandle(mac->of_node, "phy-handle", 0);
+	if (!np && of_phy_is_fixed_link(mac->of_node))
+		if (!of_phy_register_fixed_link(mac->of_node))
+			np = of_node_get(mac->of_node);
 	if (!np)
 		return -ENODEV;
 
 	switch (of_get_phy_mode(np)) {
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_ID:
 	case PHY_INTERFACE_MODE_RGMII:
 		ge_mode = 0;
 		break;
@@ -236,7 +263,8 @@ static int mtk_phy_connect(struct mtk_mac *mac)
 	mac->phy_dev->autoneg = AUTONEG_ENABLE;
 	mac->phy_dev->speed = 0;
 	mac->phy_dev->duplex = 0;
-	mac->phy_dev->supported &= PHY_BASIC_FEATURES;
+	mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause |
+				   SUPPORTED_Asym_Pause;
 	mac->phy_dev->advertising = mac->phy_dev->supported |
 				    ADVERTISED_Autoneg;
 	phy_start_aneg(mac->phy_dev);
@@ -280,7 +308,7 @@ static int mtk_mdio_init(struct mtk_eth *eth)
 	return 0;
 
 err_free_bus:
-	kfree(eth->mii_bus);
+	mdiobus_free(eth->mii_bus);
 
 err_put_node:
 	of_node_put(mii_np);
@@ -295,7 +323,7 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth)
 
 	mdiobus_unregister(eth->mii_bus);
 	of_node_put(eth->mii_bus->dev.of_node);
-	kfree(eth->mii_bus);
+	mdiobus_free(eth->mii_bus);
 }
 
 static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index fd4392999eee..f5c8d5db25a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3192,10 +3192,7 @@ static void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, void *vpriv)
 	flush_workqueue(priv->wq);
 	if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) {
 		netif_device_detach(netdev);
-		mutex_lock(&priv->state_lock);
-		if (test_bit(MLX5E_STATE_OPENED, &priv->state))
-			mlx5e_close_locked(netdev);
-		mutex_unlock(&priv->state_lock);
+		mlx5e_close(netdev);
 	} else {
 		unregister_netdev(netdev);
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 229ab16fb8d3..b000ddc29553 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -317,7 +317,8 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 	while ((sq->pc & wq->sz_m1) > sq->edge)
 		mlx5e_send_nop(sq, false);
 
-	sq->bf_budget = bf ? sq->bf_budget - 1 : 0;
+	if (bf)
+		sq->bf_budget--;
 
 	sq->stats.packets++;
 	sq->stats.bytes += num_bytes;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index b84a6918a700..aebbd6ccb9fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -383,7 +383,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
 				   0, &dest);
-	if (IS_ERR_OR_NULL(flow_rule)) {
+	if (IS_ERR(flow_rule)) {
 		pr_warn(
 			"FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
 			 dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
@@ -457,7 +457,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 
 	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 	fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0);
-	if (IS_ERR_OR_NULL(fdb)) {
+	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create FDB Table err %d\n", err);
 		goto out;
@@ -474,7 +474,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 3);
 	eth_broadcast_addr(dmac);
 	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create flow group err(%d)\n", err);
 		goto out;
@@ -489,7 +489,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	eth_zero_addr(dmac);
 	dmac[0] = 0x01;
 	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create allmulti flow group err(%d)\n", err);
 		goto out;
@@ -506,7 +506,7 @@ static int esw_create_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, table_size - 1);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
 	g = mlx5_create_flow_group(fdb, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create promisc flow group err(%d)\n", err);
 		goto out;
@@ -529,7 +529,7 @@ out:
 		}
 	}
 
-	kfree(flow_group_in);
+	kvfree(flow_group_in);
 	return err;
 }
 
@@ -651,6 +651,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
 					esw_fdb_set_vport_rule(esw,
 							       mac,
 							       vport_idx);
+			iter_vaddr->mc_promisc = true;
 			break;
 		case MLX5_ACTION_DEL:
 			if (!iter_vaddr)
@@ -1060,7 +1061,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 		return;
 
 	acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
-	if (IS_ERR_OR_NULL(acl)) {
+	if (IS_ERR(acl)) {
 		err = PTR_ERR(acl);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n",
 			 vport->vport, err);
@@ -1075,7 +1076,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
 
 	vlan_grp = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(vlan_grp)) {
+	if (IS_ERR(vlan_grp)) {
 		err = PTR_ERR(vlan_grp);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1086,7 +1087,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
 	drop_grp = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(drop_grp)) {
+	if (IS_ERR(drop_grp)) {
 		err = PTR_ERR(drop_grp);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1097,7 +1098,7 @@ static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
 	vport->egress.drop_grp = drop_grp;
 	vport->egress.allowed_vlans_grp = vlan_grp;
 out:
-	kfree(flow_group_in);
+	kvfree(flow_group_in);
 	if (err && !IS_ERR_OR_NULL(vlan_grp))
 		mlx5_destroy_flow_group(vlan_grp);
 	if (err && !IS_ERR_OR_NULL(acl))
@@ -1174,7 +1175,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 		return;
 
 	acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
-	if (IS_ERR_OR_NULL(acl)) {
+	if (IS_ERR(acl)) {
 		err = PTR_ERR(acl);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n",
 			 vport->vport, err);
@@ -1192,7 +1193,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
 
 	g = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged spoofchk flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1207,7 +1208,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
 
 	g = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress untagged flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1223,7 +1224,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
 
 	g = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress spoofchk flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1236,7 +1237,7 @@ static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
 	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
 
 	g = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR_OR_NULL(g)) {
+	if (IS_ERR(g)) {
 		err = PTR_ERR(g);
 		esw_warn(dev, "Failed to create E-Switch vport[%d] ingress drop flow group, err(%d)\n",
 			 vport->vport, err);
@@ -1259,7 +1260,7 @@ out:
 			mlx5_destroy_flow_table(vport->ingress.acl);
 	}
 
-	kfree(flow_group_in);
+	kvfree(flow_group_in);
 }
 
 static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
@@ -1363,7 +1364,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
 				   0, NULL);
-	if (IS_ERR_OR_NULL(vport->ingress.allow_rule)) {
+	if (IS_ERR(vport->ingress.allow_rule)) {
 		err = PTR_ERR(vport->ingress.allow_rule);
 		pr_warn("vport[%d] configure ingress allow rule, err(%d)\n",
 			vport->vport, err);
@@ -1380,7 +1381,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_DROP,
 				   0, NULL);
-	if (IS_ERR_OR_NULL(vport->ingress.drop_rule)) {
+	if (IS_ERR(vport->ingress.drop_rule)) {
 		err = PTR_ERR(vport->ingress.drop_rule);
 		pr_warn("vport[%d] configure ingress drop rule, err(%d)\n",
 			vport->vport, err);
@@ -1439,7 +1440,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
 				   0, NULL);
-	if (IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
+	if (IS_ERR(vport->egress.allowed_vlan)) {
 		err = PTR_ERR(vport->egress.allowed_vlan);
 		pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n",
 			vport->vport, err);
@@ -1457,7 +1458,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 				   match_v,
 				   MLX5_FLOW_CONTEXT_ACTION_DROP,
 				   0, NULL);
-	if (IS_ERR_OR_NULL(vport->egress.drop_rule)) {
+	if (IS_ERR(vport->egress.drop_rule)) {
 		err = PTR_ERR(vport->egress.drop_rule);
 		pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n",
 			vport->vport, err);
@@ -1491,14 +1492,11 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 
 	/* Sync with current vport context */
 	vport->enabled_events = enable_events;
-	esw_vport_change_handle_locked(vport);
-
 	vport->enabled = true;
 
 	/* only PF is trusted by default */
 	vport->trusted = (vport_num) ? false : true;
-
-	arm_vport_context_events_cmd(esw->dev, vport_num, enable_events);
+	esw_vport_change_handle_locked(vport);
 
 	esw->enabled_vports++;
 	esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
@@ -1728,11 +1726,24 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
 	(esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
 #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
 
+static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
+{
+	((u8 *)node_guid)[7] = mac[0];
+	((u8 *)node_guid)[6] = mac[1];
+	((u8 *)node_guid)[5] = mac[2];
+	((u8 *)node_guid)[4] = 0xff;
+	((u8 *)node_guid)[3] = 0xfe;
+	((u8 *)node_guid)[2] = mac[3];
+	((u8 *)node_guid)[1] = mac[4];
+	((u8 *)node_guid)[0] = mac[5];
+}
+
 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 			       int vport, u8 mac[ETH_ALEN])
 {
-	int err = 0;
 	struct mlx5_vport *evport;
+	u64 node_guid;
+	int err = 0;
 
 	if (!ESW_ALLOWED(esw))
 		return -EPERM;
@@ -1756,11 +1767,17 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 		return err;
 	}
 
+	node_guid_gen_from_mac(&node_guid, mac);
+	err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid);
+	if (err)
+		mlx5_core_warn(esw->dev,
+			       "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n",
+			       vport, err);
+
 	mutex_lock(&esw->state_lock);
 	if (evport->enabled)
 		err = esw_vport_ingress_config(esw, evport);
 	mutex_unlock(&esw->state_lock);
-
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 8b5f0b2c0d5c..e912a3d2505e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1292,8 +1292,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
 				       ft->id);
 			return err;
 		}
-		root->root_ft = new_root_ft;
 	}
+	root->root_ft = new_root_ft;
 	return 0;
 }
 
@@ -1767,6 +1767,9 @@ static void cleanup_root_ns(struct mlx5_core_dev *dev)
 
 void mlx5_cleanup_fs(struct mlx5_core_dev *dev)
 {
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return;
+
 	cleanup_root_ns(dev);
 	cleanup_single_prio_root_ns(dev, dev->priv.fdb_root_ns);
 	cleanup_single_prio_root_ns(dev, dev->priv.esw_egress_root_ns);
@@ -1828,29 +1831,36 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 {
 	int err = 0;
 
+	if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
+		return 0;
+
 	err = mlx5_init_fc_stats(dev);
 	if (err)
 		return err;
 
-	if (MLX5_CAP_GEN(dev, nic_flow_table)) {
+	if (MLX5_CAP_GEN(dev, nic_flow_table) &&
+	    MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_support)) {
 		err = init_root_ns(dev);
 		if (err)
 			goto err;
 	}
+
 	if (MLX5_CAP_GEN(dev, eswitch_flow_table)) {
-		err = init_fdb_root_ns(dev);
-		if (err)
-			goto err;
-	}
-	if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
-		err = init_egress_acl_root_ns(dev);
-		if (err)
-			goto err;
-	}
-	if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
-		err = init_ingress_acl_root_ns(dev);
-		if (err)
-			goto err;
+		if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, ft_support)) {
+			err = init_fdb_root_ns(dev);
+			if (err)
+				goto err;
+		}
+		if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) {
+			err = init_egress_acl_root_ns(dev);
+			if (err)
+				goto err;
+		}
+		if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) {
+			err = init_ingress_acl_root_ns(dev);
+			if (err)
+				goto err;
+		}
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index b720a274220d..b82d65802d96 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -418,7 +418,7 @@ int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
 	if (out.hdr.status)
 		err = mlx5_cmd_status_to_err(&out.hdr);
 	else
-		*xrcdn = be32_to_cpu(out.xrcdn);
+		*xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff;
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index b69dadcfb897..daf44cd4c566 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -508,6 +508,44 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_node_guid);
 
+int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
+				    u32 vport, u64 node_guid)
+{
+	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+	void *nic_vport_context;
+	u8 *guid;
+	void *in;
+	int err;
+
+	if (!vport)
+		return -EINVAL;
+	if (!MLX5_CAP_GEN(mdev, vport_group_manager))
+		return -EACCES;
+	if (!MLX5_CAP_ESW(mdev, nic_vport_node_guid_modify))
+		return -ENOTSUPP;
+
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	MLX5_SET(modify_nic_vport_context_in, in,
+		 field_select.node_guid, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
+	MLX5_SET(modify_nic_vport_context_in, in, other_vport, !!vport);
+
+	nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
+					 in, nic_vport_context);
+	guid = MLX5_ADDR_OF(nic_vport_context, nic_vport_context,
+			    node_guid);
+	MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
+
+	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+
+	kvfree(in);
+
+	return err;
+}
+
 int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
 					u16 *qkey_viol_cntr)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 4a7273771028..6f9e3ddff4a8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -247,15 +247,23 @@ static int mlxsw_sp_port_mtu_set(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pmtu), pmtu_pl);
 }
 
-static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid)
+static int __mlxsw_sp_port_swid_set(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				    u8 swid)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char pspa_pl[MLXSW_REG_PSPA_LEN];
 
-	mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sp_port->local_port);
+	mlxsw_reg_pspa_pack(pspa_pl, swid, local_port);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(pspa), pspa_pl);
 }
 
+static int mlxsw_sp_port_swid_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 swid)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+
+	return __mlxsw_sp_port_swid_set(mlxsw_sp, mlxsw_sp_port->local_port,
+					swid);
+}
+
 static int mlxsw_sp_port_vp_mode_set(struct mlxsw_sp_port *mlxsw_sp_port,
 				     bool enable)
 {
@@ -305,9 +313,9 @@ mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port)
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sspr), sspr_pl);
 }
 
-static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
-					   u8 local_port, u8 *p_module,
-					   u8 *p_width, u8 *p_lane)
+static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
+					 u8 local_port, u8 *p_module,
+					 u8 *p_width, u8 *p_lane)
 {
 	char pmlp_pl[MLXSW_REG_PMLP_LEN];
 	int err;
@@ -322,16 +330,6 @@ static int __mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static int mlxsw_sp_port_module_info_get(struct mlxsw_sp *mlxsw_sp,
-					 u8 local_port, u8 *p_module,
-					 u8 *p_width)
-{
-	u8 lane;
-
-	return __mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, p_module,
-					       p_width, &lane);
-}
-
 static int mlxsw_sp_port_module_map(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 				    u8 module, u8 width, u8 lane)
 {
@@ -949,17 +947,11 @@ static int mlxsw_sp_port_get_phys_port_name(struct net_device *dev, char *name,
 					    size_t len)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
-	u8 module, width, lane;
+	u8 module = mlxsw_sp_port->mapping.module;
+	u8 width = mlxsw_sp_port->mapping.width;
+	u8 lane = mlxsw_sp_port->mapping.lane;
 	int err;
 
-	err = __mlxsw_sp_port_module_info_get(mlxsw_sp_port->mlxsw_sp,
-					      mlxsw_sp_port->local_port,
-					      &module, &width, &lane);
-	if (err) {
-		netdev_err(dev, "Failed to retrieve module information\n");
-		return err;
-	}
-
 	if (!mlxsw_sp_port->split)
 		err = snprintf(name, len, "p%d", module + 1);
 	else
@@ -1681,8 +1673,8 @@ static int mlxsw_sp_port_ets_init(struct mlxsw_sp_port *mlxsw_sp_port)
 	return 0;
 }
 
-static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				  bool split, u8 module, u8 width)
+static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				bool split, u8 module, u8 width, u8 lane)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct net_device *dev;
@@ -1697,6 +1689,9 @@ static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
 	mlxsw_sp_port->local_port = local_port;
 	mlxsw_sp_port->split = split;
+	mlxsw_sp_port->mapping.module = module;
+	mlxsw_sp_port->mapping.width = width;
+	mlxsw_sp_port->mapping.lane = lane;
 	bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE);
 	mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL);
 	if (!mlxsw_sp_port->active_vlans) {
@@ -1839,28 +1834,6 @@ err_port_active_vlans_alloc:
 	return err;
 }
 
-static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				bool split, u8 module, u8 width, u8 lane)
-{
-	int err;
-
-	err = mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width,
-				       lane);
-	if (err)
-		return err;
-
-	err = __mlxsw_sp_port_create(mlxsw_sp, local_port, split, module,
-				     width);
-	if (err)
-		goto err_port_create;
-
-	return 0;
-
-err_port_create:
-	mlxsw_sp_port_module_unmap(mlxsw_sp, local_port);
-	return err;
-}
-
 static void mlxsw_sp_port_vports_fini(struct mlxsw_sp_port *mlxsw_sp_port)
 {
 	struct net_device *dev = mlxsw_sp_port->dev;
@@ -1909,8 +1882,8 @@ static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp)
 
 static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 {
+	u8 module, width, lane;
 	size_t alloc_size;
-	u8 module, width;
 	int i;
 	int err;
 
@@ -1921,13 +1894,14 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 
 	for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) {
 		err = mlxsw_sp_port_module_info_get(mlxsw_sp, i, &module,
-						    &width);
+						    &width, &lane);
 		if (err)
 			goto err_port_module_info_get;
 		if (!width)
 			continue;
 		mlxsw_sp->port_to_module[i] = module;
-		err = __mlxsw_sp_port_create(mlxsw_sp, i, false, module, width);
+		err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width,
+					   lane);
 		if (err)
 			goto err_port_create;
 	}
@@ -1948,12 +1922,85 @@ static u8 mlxsw_sp_cluster_base_port_get(u8 local_port)
 	return local_port - offset;
 }
 
+static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
+				      u8 module, unsigned int count)
+{
+	u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count;
+	int err, i;
+
+	for (i = 0; i < count; i++) {
+		err = mlxsw_sp_port_module_map(mlxsw_sp, base_port + i, module,
+					       width, i * width);
+		if (err)
+			goto err_port_module_map;
+	}
+
+	for (i = 0; i < count; i++) {
+		err = __mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i, 0);
+		if (err)
+			goto err_port_swid_set;
+	}
+
+	for (i = 0; i < count; i++) {
+		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true,
+					   module, width, i * width);
+		if (err)
+			goto err_port_create;
+	}
+
+	return 0;
+
+err_port_create:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+	i = count;
+err_port_swid_set:
+	for (i--; i >= 0; i--)
+		__mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i,
+					 MLXSW_PORT_SWID_DISABLED_PORT);
+	i = count;
+err_port_module_map:
+	for (i--; i >= 0; i--)
+		mlxsw_sp_port_module_unmap(mlxsw_sp, base_port + i);
+	return err;
+}
+
+static void mlxsw_sp_port_unsplit_create(struct mlxsw_sp *mlxsw_sp,
+					 u8 base_port, unsigned int count)
+{
+	u8 local_port, module, width = MLXSW_PORT_MODULE_MAX_WIDTH;
+	int i;
+
+	/* Split by four means we need to re-create two ports, otherwise
+	 * only one.
+	 */
+	count = count / 2;
+
+	for (i = 0; i < count; i++) {
+		local_port = base_port + i * 2;
+		module = mlxsw_sp->port_to_module[local_port];
+
+		mlxsw_sp_port_module_map(mlxsw_sp, local_port, module, width,
+					 0);
+	}
+
+	for (i = 0; i < count; i++)
+		__mlxsw_sp_port_swid_set(mlxsw_sp, base_port + i * 2, 0);
+
+	for (i = 0; i < count; i++) {
+		local_port = base_port + i * 2;
+		module = mlxsw_sp->port_to_module[local_port];
+
+		mlxsw_sp_port_create(mlxsw_sp, local_port, false, module,
+				     width, 0);
+	}
+}
+
 static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 			       unsigned int count)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	struct mlxsw_sp_port *mlxsw_sp_port;
-	u8 width = MLXSW_PORT_MODULE_MAX_WIDTH / count;
 	u8 module, cur_width, base_port;
 	int i;
 	int err;
@@ -1965,18 +2012,14 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 		return -EINVAL;
 	}
 
+	module = mlxsw_sp_port->mapping.module;
+	cur_width = mlxsw_sp_port->mapping.width;
+
 	if (count != 2 && count != 4) {
 		netdev_err(mlxsw_sp_port->dev, "Port can only be split into 2 or 4 ports\n");
 		return -EINVAL;
 	}
 
-	err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module,
-					    &cur_width);
-	if (err) {
-		netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n");
-		return err;
-	}
-
 	if (cur_width != MLXSW_PORT_MODULE_MAX_WIDTH) {
 		netdev_err(mlxsw_sp_port->dev, "Port cannot be split further\n");
 		return -EINVAL;
@@ -2001,25 +2044,16 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 	for (i = 0; i < count; i++)
 		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 
-	for (i = 0; i < count; i++) {
-		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i, true,
-					   module, width, i * width);
-		if (err) {
-			dev_err(mlxsw_sp->bus_info->dev, "Failed to create split port\n");
-			goto err_port_create;
-		}
+	err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to create split ports\n");
+		goto err_port_split_create;
 	}
 
 	return 0;
 
-err_port_create:
-	for (i--; i >= 0; i--)
-		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
-	for (i = 0; i < count / 2; i++) {
-		module = mlxsw_sp->port_to_module[base_port + i * 2];
-		mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false,
-				     module, MLXSW_PORT_MODULE_MAX_WIDTH, 0);
-	}
+err_port_split_create:
+	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
 	return err;
 }
 
@@ -2027,10 +2061,9 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	struct mlxsw_sp_port *mlxsw_sp_port;
-	u8 module, cur_width, base_port;
+	u8 cur_width, base_port;
 	unsigned int count;
 	int i;
-	int err;
 
 	mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	if (!mlxsw_sp_port) {
@@ -2044,12 +2077,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port)
 		return -EINVAL;
 	}
 
-	err = mlxsw_sp_port_module_info_get(mlxsw_sp, local_port, &module,
-					    &cur_width);
-	if (err) {
-		netdev_err(mlxsw_sp_port->dev, "Failed to get port's width\n");
-		return err;
-	}
+	cur_width = mlxsw_sp_port->mapping.width;
 	count = cur_width == 1 ? 4 : 2;
 
 	base_port = mlxsw_sp_cluster_base_port_get(local_port);
@@ -2061,14 +2089,7 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port)
 	for (i = 0; i < count; i++)
 		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 
-	for (i = 0; i < count / 2; i++) {
-		module = mlxsw_sp->port_to_module[base_port + i * 2];
-		err = mlxsw_sp_port_create(mlxsw_sp, base_port + i * 2, false,
-					   module, MLXSW_PORT_MODULE_MAX_WIDTH,
-					   0);
-		if (err)
-			dev_err(mlxsw_sp->bus_info->dev, "Failed to reinstantiate port\n");
-	}
+	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index e2c022d3e2f3..13b30eaa13d4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -229,6 +229,11 @@ struct mlxsw_sp_port {
 		struct ieee_maxrate *maxrate;
 		struct ieee_pfc *pfc;
 	} dcb;
+	struct {
+		u8 module;
+		u8 width;
+		u8 lane;
+	} mapping;
 	/* 802.1Q bridge VLANs */
 	unsigned long *active_vlans;
 	unsigned long *untagged_vlans;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 753064679bde..61cc6869fa65 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -1105,6 +1105,39 @@ static int qed_get_port_type(u32 media_type)
 	return port_type;
 }
 
+static int qed_get_link_data(struct qed_hwfn *hwfn,
+			     struct qed_mcp_link_params *params,
+			     struct qed_mcp_link_state *link,
+			     struct qed_mcp_link_capabilities *link_caps)
+{
+	void *p;
+
+	if (!IS_PF(hwfn->cdev)) {
+		qed_vf_get_link_params(hwfn, params);
+		qed_vf_get_link_state(hwfn, link);
+		qed_vf_get_link_caps(hwfn, link_caps);
+
+		return 0;
+	}
+
+	p = qed_mcp_get_link_params(hwfn);
+	if (!p)
+		return -ENXIO;
+	memcpy(params, p, sizeof(*params));
+
+	p = qed_mcp_get_link_state(hwfn);
+	if (!p)
+		return -ENXIO;
+	memcpy(link, p, sizeof(*link));
+
+	p = qed_mcp_get_link_capabilities(hwfn);
+	if (!p)
+		return -ENXIO;
+	memcpy(link_caps, p, sizeof(*link_caps));
+
+	return 0;
+}
+
 static void qed_fill_link(struct qed_hwfn *hwfn,
 			  struct qed_link_output *if_link)
 {
@@ -1116,15 +1149,9 @@ static void qed_fill_link(struct qed_hwfn *hwfn,
 	memset(if_link, 0, sizeof(*if_link));
 
 	/* Prepare source inputs */
-	if (IS_PF(hwfn->cdev)) {
-		memcpy(&params, qed_mcp_get_link_params(hwfn), sizeof(params));
-		memcpy(&link, qed_mcp_get_link_state(hwfn), sizeof(link));
-		memcpy(&link_caps, qed_mcp_get_link_capabilities(hwfn),
-		       sizeof(link_caps));
-	} else {
-		qed_vf_get_link_params(hwfn, &params);
-		qed_vf_get_link_state(hwfn, &link);
-		qed_vf_get_link_caps(hwfn, &link_caps);
+	if (qed_get_link_data(hwfn, &params, &link, &link_caps)) {
+		dev_warn(&hwfn->cdev->pdev->dev, "no link data available\n");
+		return;
 	}
 
 	/* Set the link parameters to pass to protocol driver */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index c8667c65e685..c90b2b6ad969 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -12,11 +12,13 @@
 #include "qed_vf.h"
 #define QED_VF_ARRAY_LENGTH (3)
 
+#ifdef CONFIG_QED_SRIOV
 #define IS_VF(cdev)             ((cdev)->b_is_vf)
 #define IS_PF(cdev)             (!((cdev)->b_is_vf))
-#ifdef CONFIG_QED_SRIOV
 #define IS_PF_SRIOV(p_hwfn)     (!!((p_hwfn)->cdev->p_iov_info))
 #else
+#define IS_VF(cdev)             (0)
+#define IS_PF(cdev)             (1)
 #define IS_PF_SRIOV(p_hwfn)     (0)
 #endif
 #define IS_PF_SRIOV_ALLOC(p_hwfn)       (!!((p_hwfn)->pf_iov_info))
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 5d00d1404bfc..5733d1888223 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -87,7 +87,9 @@ static const struct pci_device_id qede_pci_tbl[] = {
 	{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_100), QEDE_PRIVATE_PF},
 	{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_50), QEDE_PRIVATE_PF},
 	{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_25), QEDE_PRIVATE_PF},
+#ifdef CONFIG_QED_SRIOV
 	{PCI_VDEVICE(QLOGIC, PCI_DEVICE_ID_57980S_IOV), QEDE_PRIVATE_VF},
+#endif
 	{ 0 }
 };
 
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index 7f295c4d7b80..2a9228a6e4a0 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -189,11 +189,12 @@ static u32 mcdi_to_ethtool_cap(u32 media, u32 cap)
 
 	case MC_CMD_MEDIA_XFP:
 	case MC_CMD_MEDIA_SFP_PLUS:
-		result |= SUPPORTED_FIBRE;
-		break;
-
 	case MC_CMD_MEDIA_QSFP_PLUS:
 		result |= SUPPORTED_FIBRE;
+		if (cap & (1 << MC_CMD_PHY_CAP_1000FDX_LBN))
+			result |= SUPPORTED_1000baseT_Full;
+		if (cap & (1 << MC_CMD_PHY_CAP_10000FDX_LBN))
+			result |= SUPPORTED_10000baseT_Full;
 		if (cap & (1 << MC_CMD_PHY_CAP_40000FDX_LBN))
 			result |= SUPPORTED_40000baseCR4_Full;
 		break;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 4f7283d05588..44da877d2483 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -156,7 +156,7 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 		struct netdev_hw_addr *ha;
 
 		netdev_for_each_uc_addr(ha, dev) {
-			dwmac4_set_umac_addr(ioaddr, ha->addr, reg);
+			dwmac4_set_umac_addr(hw, ha->addr, reg);
 			reg++;
 		}
 	}
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index eac45d0c75e2..a473c182c91d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3450,8 +3450,6 @@ int stmmac_resume(struct device *dev)
 	if (!netif_running(ndev))
 		return 0;
 
-	spin_lock_irqsave(&priv->lock, flags);
-
 	/* Power Down bit, into the PM register, is cleared
 	 * automatically as soon as a magic packet or a Wake-up frame
 	 * is received. Anyway, it's better to manually clear
@@ -3459,7 +3457,9 @@ int stmmac_resume(struct device *dev)
 	 * from another devices (e.g. serial console).
 	 */
 	if (device_may_wakeup(priv->device)) {
+		spin_lock_irqsave(&priv->lock, flags);
 		priv->hw->mac->pmt(priv->hw, 0);
+		spin_unlock_irqrestore(&priv->lock, flags);
 		priv->irq_wake = 0;
 	} else {
 		pinctrl_pm_select_default_state(priv->device);
@@ -3473,6 +3473,8 @@ int stmmac_resume(struct device *dev)
 
 	netif_device_attach(ndev);
 
+	spin_lock_irqsave(&priv->lock, flags);
+
 	priv->cur_rx = 0;
 	priv->dirty_rx = 0;
 	priv->dirty_tx = 0;
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 4b08a2f52b3e..e6bb0ecb12c7 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1339,7 +1339,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	if (priv->coal_intvl != 0) {
 		struct ethtool_coalesce coal;
 
-		coal.rx_coalesce_usecs = (priv->coal_intvl << 4);
+		coal.rx_coalesce_usecs = priv->coal_intvl;
 		cpsw_set_coalesce(ndev, &coal);
 	}
 
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index db8022ae415b..08885bc8d6db 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1369,7 +1369,7 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
 				rcdlro = (struct Vmxnet3_RxCompDescExt *)rcd;
 
 				segCnt = rcdlro->segCnt;
-				BUG_ON(segCnt <= 1);
+				WARN_ON_ONCE(segCnt == 0);
 				mss = rcdlro->mss;
 				if (unlikely(segCnt <= 1))
 					segCnt = 0;
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index c4825392d64b..3d2b64e63408 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,10 +69,10 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.7.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.4.8.0-k"
 
 /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01040700
+#define VMXNET3_DRIVER_VERSION_NUM      0x01040800
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index d0631b6cfd53..62f475e31077 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -2540,12 +2540,14 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			   const u8 *mac, struct station_info *sinfo)
 {
 	struct brcmf_if *ifp = netdev_priv(ndev);
+	struct brcmf_scb_val_le scb_val;
 	s32 err = 0;
 	struct brcmf_sta_info_le sta_info_le;
 	u32 sta_flags;
 	u32 is_tdls_peer;
 	s32 total_rssi;
 	s32 count_rssi;
+	int rssi;
 	u32 i;
 
 	brcmf_dbg(TRACE, "Enter, MAC %pM\n", mac);
@@ -2629,6 +2631,20 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 			total_rssi /= count_rssi;
 			sinfo->signal = total_rssi;
+		} else if (test_bit(BRCMF_VIF_STATUS_CONNECTED,
+			&ifp->vif->sme_state)) {
+			memset(&scb_val, 0, sizeof(scb_val));
+			err = brcmf_fil_cmd_data_get(ifp, BRCMF_C_GET_RSSI,
+						     &scb_val, sizeof(scb_val));
+			if (err) {
+				brcmf_err("Could not get rssi (%d)\n", err);
+				goto done;
+			} else {
+				rssi = le32_to_cpu(scb_val.val);
+				sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
+				sinfo->signal = rssi;
+				brcmf_dbg(CONN, "RSSI %d dBm\n", rssi);
+			}
 		}
 	}
 done:
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
index 68f1ce02f4bf..2b9a2bc429d6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/msgbuf.c
@@ -1157,6 +1157,8 @@ brcmf_msgbuf_process_rx_complete(struct brcmf_msgbuf *msgbuf, void *buf)
 		brcmu_pkt_buf_free_skb(skb);
 		return;
 	}
+
+	skb->protocol = eth_type_trans(skb, ifp->ndev);
 	brcmf_netif_rx(ifp, skb);
 }
 
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 9ed0ed1bf514..4dd5adcdd29b 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2776,6 +2776,7 @@ static int hwsim_tx_info_frame_received_nl(struct sk_buff *skb_2,
 	if (!info->attrs[HWSIM_ATTR_ADDR_TRANSMITTER] ||
 	    !info->attrs[HWSIM_ATTR_FLAGS] ||
 	    !info->attrs[HWSIM_ATTR_COOKIE] ||
+	    !info->attrs[HWSIM_ATTR_SIGNAL] ||
 	    !info->attrs[HWSIM_ATTR_TX_INFO])
 		goto out;
 
diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index 0f48048b8654..3a0faa8fe9d4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -54,7 +54,7 @@ EXPORT_SYMBOL(channel5g_80m);
 void rtl_addr_delay(u32 addr)
 {
 	if (addr == 0xfe)
-		msleep(50);
+		mdelay(50);
 	else if (addr == 0xfd)
 		msleep(5);
 	else if (addr == 0xfc)
@@ -75,7 +75,7 @@ void rtl_rfreg_delay(struct ieee80211_hw *hw, enum radio_path rfpath, u32 addr,
 		rtl_addr_delay(addr);
 	} else {
 		rtl_set_rfreg(hw, rfpath, addr, mask, data);
-		usleep_range(1, 2);
+		udelay(1);
 	}
 }
 EXPORT_SYMBOL(rtl_rfreg_delay);
@@ -86,7 +86,7 @@ void rtl_bb_delay(struct ieee80211_hw *hw, u32 addr, u32 data)
 		rtl_addr_delay(addr);
 	} else {
 		rtl_set_bbreg(hw, addr, MASKDWORD, data);
-		usleep_range(1, 2);
+		udelay(1);
 	}
 }
 EXPORT_SYMBOL(rtl_bb_delay);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 78dca3193ca4..befac5b19490 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1679,9 +1679,14 @@ static int nvme_pci_enable(struct nvme_dev *dev)
 
 static void nvme_dev_unmap(struct nvme_dev *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev->dev);
+	int bars;
+
 	if (dev->bar)
 		iounmap(dev->bar);
-	pci_release_regions(to_pci_dev(dev->dev));
+
+	bars = pci_select_bars(pdev, IORESOURCE_MEM);
+	pci_release_selected_regions(pdev, bars);
 }
 
 static void nvme_pci_disable(struct nvme_dev *dev)
@@ -1924,7 +1929,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
 
        return 0;
   release:
-       pci_release_regions(pdev);
+       pci_release_selected_regions(pdev, bars);
        return -ENODEV;
 }
 
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 14f2f8c7c260..33daffc4392c 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -395,7 +395,7 @@ static int unflatten_dt_nodes(const void *blob,
 			      struct device_node **nodepp)
 {
 	struct device_node *root;
-	int offset = 0, depth = 0;
+	int offset = 0, depth = 0, initial_depth = 0;
 #define FDT_MAX_DEPTH	64
 	unsigned int fpsizes[FDT_MAX_DEPTH];
 	struct device_node *nps[FDT_MAX_DEPTH];
@@ -405,11 +405,22 @@ static int unflatten_dt_nodes(const void *blob,
 	if (nodepp)
 		*nodepp = NULL;
 
+	/*
+	 * We're unflattening device sub-tree if @dad is valid. There are
+	 * possibly multiple nodes in the first level of depth. We need
+	 * set @depth to 1 to make fdt_next_node() happy as it bails
+	 * immediately when negative @depth is found. Otherwise, the device
+	 * nodes except the first one won't be unflattened successfully.
+	 */
+	if (dad)
+		depth = initial_depth = 1;
+
 	root = dad;
 	fpsizes[depth] = dad ? strlen(of_node_full_name(dad)) : 0;
 	nps[depth] = dad;
+
 	for (offset = 0;
-	     offset >= 0 && depth >= 0;
+	     offset >= 0 && depth >= initial_depth;
 	     offset = fdt_next_node(blob, offset, &depth)) {
 		if (WARN_ON_ONCE(depth >= FDT_MAX_DEPTH))
 			continue;
diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index e7bfc175b8e1..6ec743faabe8 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -386,13 +386,13 @@ int of_irq_to_resource(struct device_node *dev, int index, struct resource *r)
 EXPORT_SYMBOL_GPL(of_irq_to_resource);
 
 /**
- * of_irq_get - Decode a node's IRQ and return it as a Linux irq number
+ * of_irq_get - Decode a node's IRQ and return it as a Linux IRQ number
  * @dev: pointer to device tree node
- * @index: zero-based index of the irq
- *
- * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
- * is not yet created.
+ * @index: zero-based index of the IRQ
  *
+ * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or
+ * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case
+ * of any other failure.
  */
 int of_irq_get(struct device_node *dev, int index)
 {
@@ -413,12 +413,13 @@ int of_irq_get(struct device_node *dev, int index)
 EXPORT_SYMBOL_GPL(of_irq_get);
 
 /**
- * of_irq_get_byname - Decode a node's IRQ and return it as a Linux irq number
+ * of_irq_get_byname - Decode a node's IRQ and return it as a Linux IRQ number
  * @dev: pointer to device tree node
- * @name: irq name
+ * @name: IRQ name
  *
- * Returns Linux irq number on success, or -EPROBE_DEFER if the irq domain
- * is not yet created, or error code in case of any other failure.
+ * Returns Linux IRQ number on success, or 0 on the IRQ mapping failure, or
+ * -EPROBE_DEFER if the IRQ domain is not yet created, or error code in case
+ * of any other failure.
  */
 int of_irq_get_byname(struct device_node *dev, const char *name)
 {
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index ed01c0172e4a..216648233874 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -127,8 +127,15 @@ static int __init __reserved_mem_alloc_size(unsigned long node,
 	}
 
 	/* Need adjust the alignment to satisfy the CMA requirement */
-	if (IS_ENABLED(CONFIG_CMA) && of_flat_dt_is_compatible(node, "shared-dma-pool"))
-		align = max(align, (phys_addr_t)PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order));
+	if (IS_ENABLED(CONFIG_CMA)
+	    && of_flat_dt_is_compatible(node, "shared-dma-pool")
+	    && of_get_flat_dt_prop(node, "reusable", NULL)
+	    && !of_get_flat_dt_prop(node, "no-map", NULL)) {
+		unsigned long order =
+			max_t(unsigned long, MAX_ORDER - 1, pageblock_order);
+
+		align = max(align, (phys_addr_t)PAGE_SIZE << order);
+	}
 
 	prop = of_get_flat_dt_prop(node, "alloc-ranges", &len);
 	if (prop) {
diff --git a/drivers/pci/vc.c b/drivers/pci/vc.c
index dfbab61a1b47..1fa3a3219c45 100644
--- a/drivers/pci/vc.c
+++ b/drivers/pci/vc.c
@@ -221,9 +221,9 @@ static int pci_vc_do_save_buffer(struct pci_dev *dev, int pos,
 		else
 			pci_write_config_word(dev, pos + PCI_VC_PORT_CTRL,
 					      *(u16 *)buf);
-		buf += 2;
+		buf += 4;
 	}
-	len += 2;
+	len += 4;
 
 	/*
 	 * If we have any Low Priority VCs and a VC Arbitration Table Offset
diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c
index 1b8304e1efaa..140436a046c0 100644
--- a/drivers/perf/arm_pmu.c
+++ b/drivers/perf/arm_pmu.c
@@ -1010,8 +1010,8 @@ int arm_pmu_device_probe(struct platform_device *pdev,
 		if (!ret)
 			ret = init_fn(pmu);
 	} else {
-		ret = probe_current_pmu(pmu, probe_table);
 		cpumask_setall(&pmu->supported_cpus);
+		ret = probe_current_pmu(pmu, probe_table);
 	}
 
 	if (ret) {
diff --git a/drivers/phy/phy-exynos-mipi-video.c b/drivers/phy/phy-exynos-mipi-video.c
index cc093ebfda94..8b851f718123 100644
--- a/drivers/phy/phy-exynos-mipi-video.c
+++ b/drivers/phy/phy-exynos-mipi-video.c
@@ -233,8 +233,12 @@ static inline int __is_running(const struct exynos_mipi_phy_desc *data,
 			struct exynos_mipi_video_phy *state)
 {
 	u32 val;
+	int ret;
+
+	ret = regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val);
+	if (ret)
+		return 0;
 
-	regmap_read(state->regmaps[data->resetn_map], data->resetn_reg, &val);
 	return val & data->resetn_val;
 }
 
diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c
index 0a477d24cf76..bf46844dc387 100644
--- a/drivers/phy/phy-ti-pipe3.c
+++ b/drivers/phy/phy-ti-pipe3.c
@@ -293,11 +293,18 @@ static int ti_pipe3_init(struct phy *x)
 		ret = ti_pipe3_dpll_wait_lock(phy);
 	}
 
-	/* Program the DPLL only if not locked */
+	/* SATA has issues if re-programmed when locked */
 	val = ti_pipe3_readl(phy->pll_ctrl_base, PLL_STATUS);
-	if (!(val & PLL_LOCK))
-		if (ti_pipe3_dpll_program(phy))
-			return -EINVAL;
+	if ((val & PLL_LOCK) && of_device_is_compatible(phy->dev->of_node,
+							"ti,phy-pipe3-sata"))
+		return ret;
+
+	/* Program the DPLL */
+	ret = ti_pipe3_dpll_program(phy);
+	if (ret) {
+		ti_pipe3_disable_clocks(phy);
+		return -EINVAL;
+	}
 
 	return ret;
 }
diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c
index 6b6af6cba454..d9b10a39a2cf 100644
--- a/drivers/phy/phy-twl4030-usb.c
+++ b/drivers/phy/phy-twl4030-usb.c
@@ -463,7 +463,8 @@ static int twl4030_phy_power_on(struct phy *phy)
 	twl4030_usb_set_mode(twl, twl->usb_mode);
 	if (twl->usb_mode == T2_USB_MODE_ULPI)
 		twl4030_i2c_access(twl, 0);
-	schedule_delayed_work(&twl->id_workaround_work, 0);
+	twl->linkstat = MUSB_UNKNOWN;
+	schedule_delayed_work(&twl->id_workaround_work, HZ);
 
 	return 0;
 }
@@ -537,6 +538,7 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl)
 	struct twl4030_usb *twl = _twl;
 	enum musb_vbus_id_status status;
 	bool status_changed = false;
+	int err;
 
 	status = twl4030_usb_linkstat(twl);
 
@@ -567,7 +569,9 @@ static irqreturn_t twl4030_usb_irq(int irq, void *_twl)
 			pm_runtime_mark_last_busy(twl->dev);
 			pm_runtime_put_autosuspend(twl->dev);
 		}
-		musb_mailbox(status);
+		err = musb_mailbox(status);
+		if (err)
+			twl->linkstat = MUSB_UNKNOWN;
 	}
 
 	/* don't schedule during sleep - irq works right then */
@@ -595,7 +599,8 @@ static int twl4030_phy_init(struct phy *phy)
 	struct twl4030_usb *twl = phy_get_drvdata(phy);
 
 	pm_runtime_get_sync(twl->dev);
-	schedule_delayed_work(&twl->id_workaround_work, 0);
+	twl->linkstat = MUSB_UNKNOWN;
+	schedule_delayed_work(&twl->id_workaround_work, HZ);
 	pm_runtime_mark_last_busy(twl->dev);
 	pm_runtime_put_autosuspend(twl->dev);
 
@@ -763,7 +768,8 @@ static int twl4030_usb_remove(struct platform_device *pdev)
 	if (cable_present(twl->linkstat))
 		pm_runtime_put_noidle(twl->dev);
 	pm_runtime_mark_last_busy(twl->dev);
-	pm_runtime_put_sync_suspend(twl->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
+	pm_runtime_put_sync(twl->dev);
 	pm_runtime_disable(twl->dev);
 
 	/* autogate 60MHz ULPI clock,
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index c06bb85c2839..3ec0025d19e7 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -103,7 +103,6 @@ config DELL_SMBIOS
 
 config DELL_LAPTOP
 	tristate "Dell Laptop Extras"
-	depends on X86
 	depends on DELL_SMBIOS
 	depends on DMI
 	depends on BACKLIGHT_CLASS_DEVICE
@@ -505,7 +504,7 @@ config THINKPAD_ACPI_HOTKEY_POLL
 
 config SENSORS_HDAPS
 	tristate "Thinkpad Hard Drive Active Protection System (hdaps)"
-	depends on INPUT && X86
+	depends on INPUT
 	select INPUT_POLLDEV
 	default n
 	help
@@ -749,7 +748,7 @@ config TOSHIBA_WMI
 
 config ACPI_CMPC
 	tristate "CMPC Laptop Extras"
-	depends on X86 && ACPI
+	depends on ACPI
 	depends on RFKILL || RFKILL=n
 	select INPUT
 	select BACKLIGHT_CLASS_DEVICE
@@ -848,7 +847,7 @@ config INTEL_IMR
 
 config INTEL_PMC_CORE
 	bool "Intel PMC Core driver"
-	depends on X86 && PCI
+	depends on PCI
 	---help---
 	  The Intel Platform Controller Hub for Intel Core SoCs provides access
 	  to Power Management Controller registers via a PCI interface. This
@@ -860,7 +859,7 @@ config INTEL_PMC_CORE
 
 config IBM_RTL
 	tristate "Device driver to enable PRTL support"
-	depends on X86 && PCI
+	depends on PCI
 	---help---
 	 Enable support for IBM Premium Real Time Mode (PRTM).
 	 This module will allow you the enter and exit PRTM in the BIOS via
@@ -894,7 +893,6 @@ config XO15_EBOOK
 
 config SAMSUNG_LAPTOP
 	tristate "Samsung Laptop driver"
-	depends on X86
 	depends on RFKILL || RFKILL = n
 	depends on ACPI_VIDEO || ACPI_VIDEO = n
 	depends on BACKLIGHT_CLASS_DEVICE
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index 4a23fbc66b71..d1a091b93192 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -567,6 +567,7 @@ static void ideapad_sysfs_exit(struct ideapad_private *priv)
 static const struct key_entry ideapad_keymap[] = {
 	{ KE_KEY, 6,  { KEY_SWITCHVIDEOMODE } },
 	{ KE_KEY, 7,  { KEY_CAMERA } },
+	{ KE_KEY, 8,  { KEY_MICMUTE } },
 	{ KE_KEY, 11, { KEY_F16 } },
 	{ KE_KEY, 13, { KEY_WLAN } },
 	{ KE_KEY, 16, { KEY_PROG1 } },
@@ -809,6 +810,7 @@ static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data)
 				break;
 			case 13:
 			case 11:
+			case 8:
 			case 7:
 			case 6:
 				ideapad_input_report(priv, vpc_bit);
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index c3bfa1fe95bf..b65ce7519411 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -2043,6 +2043,7 @@ static int hotkey_autosleep_ack;
 
 static u32 hotkey_orig_mask;		/* events the BIOS had enabled */
 static u32 hotkey_all_mask;		/* all events supported in fw */
+static u32 hotkey_adaptive_all_mask;	/* all adaptive events supported in fw */
 static u32 hotkey_reserved_mask;	/* events better left disabled */
 static u32 hotkey_driver_mask;		/* events needed by the driver */
 static u32 hotkey_user_mask;		/* events visible to userspace */
@@ -2742,6 +2743,17 @@ static ssize_t hotkey_all_mask_show(struct device *dev,
 
 static DEVICE_ATTR_RO(hotkey_all_mask);
 
+/* sysfs hotkey all_mask ----------------------------------------------- */
+static ssize_t hotkey_adaptive_all_mask_show(struct device *dev,
+			   struct device_attribute *attr,
+			   char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "0x%08x\n",
+			hotkey_adaptive_all_mask | hotkey_source_mask);
+}
+
+static DEVICE_ATTR_RO(hotkey_adaptive_all_mask);
+
 /* sysfs hotkey recommended_mask --------------------------------------- */
 static ssize_t hotkey_recommended_mask_show(struct device *dev,
 					    struct device_attribute *attr,
@@ -2985,6 +2997,7 @@ static struct attribute *hotkey_attributes[] __initdata = {
 	&dev_attr_wakeup_hotunplug_complete.attr,
 	&dev_attr_hotkey_mask.attr,
 	&dev_attr_hotkey_all_mask.attr,
+	&dev_attr_hotkey_adaptive_all_mask.attr,
 	&dev_attr_hotkey_recommended_mask.attr,
 #ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
 	&dev_attr_hotkey_source_mask.attr,
@@ -3321,20 +3334,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	if (!tp_features.hotkey)
 		return 1;
 
-	/*
-	 * Check if we have an adaptive keyboard, like on the
-	 * Lenovo Carbon X1 2014 (2nd Gen).
-	 */
-	if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-		if ((hkeyv >> 8) == 2) {
-			tp_features.has_adaptive_kbd = true;
-			res = sysfs_create_group(&tpacpi_pdev->dev.kobj,
-					&adaptive_kbd_attr_group);
-			if (res)
-				goto err_exit;
-		}
-	}
-
 	quirks = tpacpi_check_quirks(tpacpi_hotkey_qtable,
 				     ARRAY_SIZE(tpacpi_hotkey_qtable));
 
@@ -3357,30 +3356,70 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
 	   A30, R30, R31, T20-22, X20-21, X22-24.  Detected by checking
 	   for HKEY interface version 0x100 */
 	if (acpi_evalf(hkey_handle, &hkeyv, "MHKV", "qd")) {
-		if ((hkeyv >> 8) != 1) {
-			pr_err("unknown version of the HKEY interface: 0x%x\n",
-			       hkeyv);
-			pr_err("please report this to %s\n", TPACPI_MAIL);
-		} else {
+		vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
+			    "firmware HKEY interface version: 0x%x\n",
+			    hkeyv);
+
+		switch (hkeyv >> 8) {
+		case 1:
 			/*
 			 * MHKV 0x100 in A31, R40, R40e,
 			 * T4x, X31, and later
 			 */
-			vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_HKEY,
-				"firmware HKEY interface version: 0x%x\n",
-				hkeyv);
 
 			/* Paranoia check AND init hotkey_all_mask */
 			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
 					"MHKA", "qd")) {
-				pr_err("missing MHKA handler, "
-				       "please report this to %s\n",
+				pr_err("missing MHKA handler, please report this to %s\n",
 				       TPACPI_MAIL);
 				/* Fallback: pre-init for FN+F3,F4,F12 */
 				hotkey_all_mask = 0x080cU;
 			} else {
 				tp_features.hotkey_mask = 1;
 			}
+			break;
+
+		case 2:
+			/*
+			 * MHKV 0x200 in X1, T460s, X260, T560, X1 Tablet (2016)
+			 */
+
+			/* Paranoia check AND init hotkey_all_mask */
+			if (!acpi_evalf(hkey_handle, &hotkey_all_mask,
+					"MHKA", "dd", 1)) {
+				pr_err("missing MHKA handler, please report this to %s\n",
+				       TPACPI_MAIL);
+				/* Fallback: pre-init for FN+F3,F4,F12 */
+				hotkey_all_mask = 0x080cU;
+			} else {
+				tp_features.hotkey_mask = 1;
+			}
+
+			/*
+			 * Check if we have an adaptive keyboard, like on the
+			 * Lenovo Carbon X1 2014 (2nd Gen).
+			 */
+			if (acpi_evalf(hkey_handle, &hotkey_adaptive_all_mask,
+				       "MHKA", "dd", 2)) {
+				if (hotkey_adaptive_all_mask != 0) {
+					tp_features.has_adaptive_kbd = true;
+					res = sysfs_create_group(
+						&tpacpi_pdev->dev.kobj,
+						&adaptive_kbd_attr_group);
+					if (res)
+						goto err_exit;
+				}
+			} else {
+				tp_features.has_adaptive_kbd = false;
+				hotkey_adaptive_all_mask = 0x0U;
+			}
+			break;
+
+		default:
+			pr_err("unknown version of the HKEY interface: 0x%x\n",
+			       hkeyv);
+			pr_err("please report this to %s\n", TPACPI_MAIL);
+			break;
 		}
 	}
 
diff --git a/drivers/pwm/core.c b/drivers/pwm/core.c
index dba3843c53b8..ed337a8c34ab 100644
--- a/drivers/pwm/core.c
+++ b/drivers/pwm/core.c
@@ -457,7 +457,8 @@ int pwm_apply_state(struct pwm_device *pwm, struct pwm_state *state)
 {
 	int err;
 
-	if (!pwm)
+	if (!pwm || !state || !state->period ||
+	    state->duty_cycle > state->period)
 		return -EINVAL;
 
 	if (!memcmp(state, &pwm->state, sizeof(*state)))
diff --git a/drivers/pwm/pwm-atmel-hlcdc.c b/drivers/pwm/pwm-atmel-hlcdc.c
index f994c7eaf41c..14fc011faa32 100644
--- a/drivers/pwm/pwm-atmel-hlcdc.c
+++ b/drivers/pwm/pwm-atmel-hlcdc.c
@@ -272,7 +272,7 @@ static int atmel_hlcdc_pwm_probe(struct platform_device *pdev)
 	chip->chip.of_pwm_n_cells = 3;
 	chip->chip.can_sleep = 1;
 
-	ret = pwmchip_add(&chip->chip);
+	ret = pwmchip_add_with_polarity(&chip->chip, PWM_POLARITY_INVERSED);
 	if (ret) {
 		clk_disable_unprepare(hlcdc->periph_clk);
 		return ret;
diff --git a/drivers/pwm/sysfs.c b/drivers/pwm/sysfs.c
index d98599249a05..01695d48dd54 100644
--- a/drivers/pwm/sysfs.c
+++ b/drivers/pwm/sysfs.c
@@ -152,7 +152,7 @@ static ssize_t enable_store(struct device *child,
 		goto unlock;
 	}
 
-	pwm_apply_state(pwm, &state);
+	ret = pwm_apply_state(pwm, &state);
 
 unlock:
 	mutex_unlock(&export->lock);
diff --git a/drivers/regulator/qcom_smd-regulator.c b/drivers/regulator/qcom_smd-regulator.c
index 56a17ec5b5ef..526bf23dcb49 100644
--- a/drivers/regulator/qcom_smd-regulator.c
+++ b/drivers/regulator/qcom_smd-regulator.c
@@ -140,6 +140,19 @@ static const struct regulator_ops rpm_smps_ldo_ops = {
 	.enable = rpm_reg_enable,
 	.disable = rpm_reg_disable,
 	.is_enabled = rpm_reg_is_enabled,
+	.list_voltage = regulator_list_voltage_linear_range,
+
+	.get_voltage = rpm_reg_get_voltage,
+	.set_voltage = rpm_reg_set_voltage,
+
+	.set_load = rpm_reg_set_load,
+};
+
+static const struct regulator_ops rpm_smps_ldo_ops_fixed = {
+	.enable = rpm_reg_enable,
+	.disable = rpm_reg_disable,
+	.is_enabled = rpm_reg_is_enabled,
+	.list_voltage = regulator_list_voltage_linear_range,
 
 	.get_voltage = rpm_reg_get_voltage,
 	.set_voltage = rpm_reg_set_voltage,
@@ -247,7 +260,7 @@ static const struct regulator_desc pm8941_nldo = {
 static const struct regulator_desc pm8941_lnldo = {
 	.fixed_uV = 1740000,
 	.n_voltages = 1,
-	.ops = &rpm_smps_ldo_ops,
+	.ops = &rpm_smps_ldo_ops_fixed,
 };
 
 static const struct regulator_desc pm8941_switch = {
diff --git a/drivers/regulator/tps51632-regulator.c b/drivers/regulator/tps51632-regulator.c
index 572816e30095..c139890c1514 100644
--- a/drivers/regulator/tps51632-regulator.c
+++ b/drivers/regulator/tps51632-regulator.c
@@ -94,11 +94,14 @@ static int tps51632_dcdc_set_ramp_delay(struct regulator_dev *rdev,
 		int ramp_delay)
 {
 	struct tps51632_chip *tps = rdev_get_drvdata(rdev);
-	int bit = ramp_delay/6000;
+	int bit;
 	int ret;
 
-	if (bit)
-		bit--;
+	if (ramp_delay == 0)
+		bit = 0;
+	else
+		bit = DIV_ROUND_UP(ramp_delay, 6000) - 1;
+
 	ret = regmap_write(tps->regmap, TPS51632_SLEW_REGS, BIT(bit));
 	if (ret < 0)
 		dev_err(tps->dev, "SLEW reg write failed, err %d\n", ret);
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index d4c285688ce9..3ddc85e6efd6 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1122,7 +1122,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp,
 		} else {
 			struct scsi_cmnd *SCp;
 
-			SCp = scsi_host_find_tag(SDp->host, SCSI_NO_TAG);
+			SCp = SDp->current_cmnd;
 			if(unlikely(SCp == NULL)) {
 				sdev_printk(KERN_ERR, SDp,
 					"no saved request for untagged cmd\n");
@@ -1826,7 +1826,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 		       slot->tag, slot);
 	} else {
 		slot->tag = SCSI_NO_TAG;
-		/* must populate current_cmnd for scsi_host_find_tag to work */
+		/* save current command for reselection */
 		SCp->device->current_cmnd = SCp;
 	}
 	/* sanity check: some of the commands generated by the mid-layer
diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index 3408578b08d6..ff41c310c900 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c
@@ -230,6 +230,7 @@ static struct {
 	{"PIONEER", "CD-ROM DRM-624X", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},
 	{"Promise", "VTrak E610f", NULL, BLIST_SPARSELUN | BLIST_NO_RSOC},
 	{"Promise", "", NULL, BLIST_SPARSELUN},
+	{"QEMU", "QEMU CD-ROM", NULL, BLIST_SKIP_VPD_PAGES},
 	{"QNAP", "iSCSI Storage", NULL, BLIST_MAX_1024},
 	{"SYNOLOGY", "iSCSI Storage", NULL, BLIST_MAX_1024},
 	{"QUANTUM", "XP34301", "1071", BLIST_NOTQ},
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index a8b610eaa0ca..106a6adbd6f1 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1128,7 +1128,6 @@ static int scsi_eh_action(struct scsi_cmnd *scmd, int rtn)
  */
 void scsi_eh_finish_cmd(struct scsi_cmnd *scmd, struct list_head *done_q)
 {
-	scmd->device->host->host_failed--;
 	scmd->eh_eflags = 0;
 	list_move_tail(&scmd->eh_entry, done_q);
 }
@@ -2227,6 +2226,9 @@ int scsi_error_handler(void *data)
 		else
 			scsi_unjam_host(shost);
 
+		/* All scmds have been handled */
+		shost->host_failed = 0;
+
 		/*
 		 * Note - if the above fails completely, the action is to take
 		 * individual devices offline and flush the queue of any
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index f459dff30512..60bff78e9ead 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2867,10 +2867,10 @@ static int sd_revalidate_disk(struct gendisk *disk)
 	if (sdkp->opt_xfer_blocks &&
 	    sdkp->opt_xfer_blocks <= dev_max &&
 	    sdkp->opt_xfer_blocks <= SD_DEF_XFER_BLOCKS &&
-	    sdkp->opt_xfer_blocks * sdp->sector_size >= PAGE_SIZE)
-		rw_max = q->limits.io_opt =
-			sdkp->opt_xfer_blocks * sdp->sector_size;
-	else
+	    logical_to_bytes(sdp, sdkp->opt_xfer_blocks) >= PAGE_SIZE) {
+		q->limits.io_opt = logical_to_bytes(sdp, sdkp->opt_xfer_blocks);
+		rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
+	} else
 		rw_max = BLK_DEF_MAX_SECTORS;
 
 	/* Combine with controller limits */
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 654630bb7d0e..765a6f1ac1b7 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -151,6 +151,11 @@ static inline sector_t logical_to_sectors(struct scsi_device *sdev, sector_t blo
 	return blocks << (ilog2(sdev->sector_size) - 9);
 }
 
+static inline unsigned int logical_to_bytes(struct scsi_device *sdev, sector_t blocks)
+{
+	return blocks * sdev->sector_size;
+}
+
 /*
  * A DIF-capable target device can be formatted with different
  * protection schemes.  Currently 0 through 3 are defined:
diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
index bbfee53cfcf5..845e49a52430 100644
--- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
+++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd_cb.c
@@ -2521,12 +2521,13 @@ kiblnd_passive_connect(struct rdma_cm_id *cmid, void *priv, int priv_nob)
 	return 0;
 
  failed:
-	if (ni)
+	if (ni) {
 		lnet_ni_decref(ni);
+		rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
+		rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
+	}
 
 	rej.ibr_version             = version;
-	rej.ibr_cp.ibcp_queue_depth = kiblnd_msg_queue_size(version, ni);
-	rej.ibr_cp.ibcp_max_frags = kiblnd_rdma_frags(version, ni);
 	kiblnd_reject(cmid, &rej);
 
 	return -ECONNREFUSED;
diff --git a/drivers/staging/rtl8188eu/core/rtw_efuse.c b/drivers/staging/rtl8188eu/core/rtw_efuse.c
index c17870cddb5b..fbce1f7e68ca 100644
--- a/drivers/staging/rtl8188eu/core/rtw_efuse.c
+++ b/drivers/staging/rtl8188eu/core/rtw_efuse.c
@@ -102,7 +102,7 @@ efuse_phymap_to_logical(u8 *phymap, u16 _offset, u16 _size_byte, u8  *pbuf)
 	if (!efuseTbl)
 		return;
 
-	eFuseWord = (u16 **)rtw_malloc2d(EFUSE_MAX_SECTION_88E, EFUSE_MAX_WORD_UNIT, sizeof(*eFuseWord));
+	eFuseWord = (u16 **)rtw_malloc2d(EFUSE_MAX_SECTION_88E, EFUSE_MAX_WORD_UNIT, sizeof(u16));
 	if (!eFuseWord) {
 		DBG_88E("%s: alloc eFuseWord fail!\n", __func__);
 		goto eFuseWord_failed;
diff --git a/drivers/staging/rtl8188eu/hal/usb_halinit.c b/drivers/staging/rtl8188eu/hal/usb_halinit.c
index 87ea3b844951..363f3a34ddce 100644
--- a/drivers/staging/rtl8188eu/hal/usb_halinit.c
+++ b/drivers/staging/rtl8188eu/hal/usb_halinit.c
@@ -2072,7 +2072,8 @@ void rtl8188eu_set_hal_ops(struct adapter *adapt)
 {
 	struct hal_ops	*halfunc = &adapt->HalFunc;
 
-	adapt->HalData = kzalloc(sizeof(*adapt->HalData), GFP_KERNEL);
+
+	adapt->HalData = kzalloc(sizeof(struct hal_data_8188e), GFP_KERNEL);
 	if (!adapt->HalData)
 		DBG_88E("cant not alloc memory for HAL DATA\n");
 
diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index 6ceac4f2d4b2..5b4b47ed948b 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c
@@ -857,14 +857,6 @@ __cpufreq_cooling_register(struct device_node *np,
 		goto free_power_table;
 	}
 
-	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
-		 cpufreq_dev->id);
-
-	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
-						      &cpufreq_cooling_ops);
-	if (IS_ERR(cool_dev))
-		goto remove_idr;
-
 	/* Fill freq-table in descending order of frequencies */
 	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
 		freq = find_next_max(table, freq);
@@ -877,6 +869,14 @@ __cpufreq_cooling_register(struct device_node *np,
 			pr_debug("%s: freq:%u KHz\n", __func__, freq);
 	}
 
+	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
+		 cpufreq_dev->id);
+
+	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
+						      &cpufreq_cooling_ops);
+	if (IS_ERR(cool_dev))
+		goto remove_idr;
+
 	cpufreq_dev->clipped_freq = cpufreq_dev->freq_table[0];
 	cpufreq_dev->cool_dev = cool_dev;
 
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 6dc810bce295..944a6dca0fcb 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -44,6 +44,9 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* Creative SB Audigy 2 NX */
 	{ USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* USB3503 */
+	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	/* Microsoft Wireless Laser Mouse 6000 Receiver */
 	{ USB_DEVICE(0x045e, 0x00e1), .driver_info = USB_QUIRK_RESET_RESUME },
 
@@ -173,6 +176,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* MAYA44USB sound device */
 	{ USB_DEVICE(0x0a92, 0x0091), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* ASUS Base Station(T100) */
+	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
 	/* Action Semiconductor flash disk */
 	{ USB_DEVICE(0x10d6, 0x2200), .driver_info =
 			USB_QUIRK_STRING_FETCH_255 },
@@ -188,26 +195,22 @@ static const struct usb_device_id usb_quirk_list[] = {
 	{ USB_DEVICE(0x1908, 0x1315), .driver_info =
 			USB_QUIRK_HONOR_BNUMINTERFACES },
 
-	/* INTEL VALUE SSD */
-	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
-
-	/* USB3503 */
-	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
-
-	/* ASUS Base Station(T100) */
-	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
-			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
-
 	/* Protocol and OTG Electrical Test Device */
 	{ USB_DEVICE(0x1a0a, 0x0200), .driver_info =
 			USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL },
 
+	/* Acer C120 LED Projector */
+	{ USB_DEVICE(0x1de1, 0xc102), .driver_info = USB_QUIRK_NO_LPM },
+
 	/* Blackmagic Design Intensity Shuttle */
 	{ USB_DEVICE(0x1edb, 0xbd3b), .driver_info = USB_QUIRK_NO_LPM },
 
 	/* Blackmagic Design UltraStudio SDI */
 	{ USB_DEVICE(0x1edb, 0xbd4f), .driver_info = USB_QUIRK_NO_LPM },
 
+	/* INTEL VALUE SSD */
+	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
+
 	{ }  /* terminating entry must be last */
 };
 
diff --git a/drivers/usb/dwc2/core.h b/drivers/usb/dwc2/core.h
index 3c58d633ce80..dec0b21fc626 100644
--- a/drivers/usb/dwc2/core.h
+++ b/drivers/usb/dwc2/core.h
@@ -64,6 +64,17 @@
 	DWC2_TRACE_SCHEDULER_VB(pr_fmt("%s: SCH: " fmt),		\
 				dev_name(hsotg->dev), ##__VA_ARGS__)
 
+#ifdef CONFIG_MIPS
+/*
+ * There are some MIPS machines that can run in either big-endian
+ * or little-endian mode and that use the dwc2 register without
+ * a byteswap in both ways.
+ * Unlike other architectures, MIPS apparently does not require a
+ * barrier before the __raw_writel() to synchronize with DMA but does
+ * require the barrier after the __raw_writel() to serialize a set of
+ * writes. This set of operations was added specifically for MIPS and
+ * should only be used there.
+ */
 static inline u32 dwc2_readl(const void __iomem *addr)
 {
 	u32 value = __raw_readl(addr);
@@ -90,6 +101,22 @@ static inline void dwc2_writel(u32 value, void __iomem *addr)
 	pr_info("INFO:: wrote %08x to %p\n", value, addr);
 #endif
 }
+#else
+/* Normal architectures just use readl/write */
+static inline u32 dwc2_readl(const void __iomem *addr)
+{
+	return readl(addr);
+}
+
+static inline void dwc2_writel(u32 value, void __iomem *addr)
+{
+	writel(value, addr);
+
+#ifdef DWC2_LOG_WRITES
+	pr_info("info:: wrote %08x to %p\n", value, addr);
+#endif
+}
+#endif
 
 /* Maximum number of Endpoints/HostChannels */
 #define MAX_EPS_CHANNELS	16
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 4c5e3005e1dc..26cf09d0fe3c 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -1018,7 +1018,7 @@ static int dwc2_hsotg_process_req_status(struct dwc2_hsotg *hsotg,
 	return 1;
 }
 
-static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value);
+static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now);
 
 /**
  * get_ep_head - return the first request on the endpoint
@@ -1094,7 +1094,7 @@ static int dwc2_hsotg_process_req_feature(struct dwc2_hsotg *hsotg,
 		case USB_ENDPOINT_HALT:
 			halted = ep->halted;
 
-			dwc2_hsotg_ep_sethalt(&ep->ep, set);
+			dwc2_hsotg_ep_sethalt(&ep->ep, set, true);
 
 			ret = dwc2_hsotg_send_reply(hsotg, ep0, NULL, 0);
 			if (ret) {
@@ -2948,8 +2948,13 @@ static int dwc2_hsotg_ep_dequeue(struct usb_ep *ep, struct usb_request *req)
  * dwc2_hsotg_ep_sethalt - set halt on a given endpoint
  * @ep: The endpoint to set halt.
  * @value: Set or unset the halt.
+ * @now: If true, stall the endpoint now. Otherwise return -EAGAIN if
+ *       the endpoint is busy processing requests.
+ *
+ * We need to stall the endpoint immediately if request comes from set_feature
+ * protocol command handler.
  */
-static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value)
+static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value, bool now)
 {
 	struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
 	struct dwc2_hsotg *hs = hs_ep->parent;
@@ -2969,6 +2974,17 @@ static int dwc2_hsotg_ep_sethalt(struct usb_ep *ep, int value)
 		return 0;
 	}
 
+	if (hs_ep->isochronous) {
+		dev_err(hs->dev, "%s is Isochronous Endpoint\n", ep->name);
+		return -EINVAL;
+	}
+
+	if (!now && value && !list_empty(&hs_ep->queue)) {
+		dev_dbg(hs->dev, "%s request is pending, cannot halt\n",
+			ep->name);
+		return -EAGAIN;
+	}
+
 	if (hs_ep->dir_in) {
 		epreg = DIEPCTL(index);
 		epctl = dwc2_readl(hs->regs + epreg);
@@ -3020,7 +3036,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
 	int ret = 0;
 
 	spin_lock_irqsave(&hs->lock, flags);
-	ret = dwc2_hsotg_ep_sethalt(ep, value);
+	ret = dwc2_hsotg_ep_sethalt(ep, value, false);
 	spin_unlock_irqrestore(&hs->lock, flags);
 
 	return ret;
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 7ddf9449a063..654050684f4f 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -402,6 +402,7 @@
 #define DWC3_DEPCMD_GET_RSC_IDX(x)	(((x) >> DWC3_DEPCMD_PARAM_SHIFT) & 0x7f)
 #define DWC3_DEPCMD_STATUS(x)		(((x) >> 12) & 0x0F)
 #define DWC3_DEPCMD_HIPRI_FORCERM	(1 << 11)
+#define DWC3_DEPCMD_CLEARPENDIN		(1 << 11)
 #define DWC3_DEPCMD_CMDACT		(1 << 10)
 #define DWC3_DEPCMD_CMDIOC		(1 << 8)
 
diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
index dd5cb5577dca..2f1fb7e7aa54 100644
--- a/drivers/usb/dwc3/dwc3-exynos.c
+++ b/drivers/usb/dwc3/dwc3-exynos.c
@@ -128,12 +128,6 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, exynos);
 
-	ret = dwc3_exynos_register_phys(exynos);
-	if (ret) {
-		dev_err(dev, "couldn't register PHYs\n");
-		return ret;
-	}
-
 	exynos->dev	= dev;
 
 	exynos->clk = devm_clk_get(dev, "usbdrd30");
@@ -183,20 +177,29 @@ static int dwc3_exynos_probe(struct platform_device *pdev)
 		goto err3;
 	}
 
+	ret = dwc3_exynos_register_phys(exynos);
+	if (ret) {
+		dev_err(dev, "couldn't register PHYs\n");
+		goto err4;
+	}
+
 	if (node) {
 		ret = of_platform_populate(node, NULL, NULL, dev);
 		if (ret) {
 			dev_err(dev, "failed to add dwc3 core\n");
-			goto err4;
+			goto err5;
 		}
 	} else {
 		dev_err(dev, "no device node, failed to add dwc3 core\n");
 		ret = -ENODEV;
-		goto err4;
+		goto err5;
 	}
 
 	return 0;
 
+err5:
+	platform_device_unregister(exynos->usb2_phy);
+	platform_device_unregister(exynos->usb3_phy);
 err4:
 	regulator_disable(exynos->vdd10);
 err3:
diff --git a/drivers/usb/dwc3/dwc3-st.c b/drivers/usb/dwc3/dwc3-st.c
index 5c0adb9c6fb2..50d6ae6f88bc 100644
--- a/drivers/usb/dwc3/dwc3-st.c
+++ b/drivers/usb/dwc3/dwc3-st.c
@@ -129,12 +129,18 @@ static int st_dwc3_drd_init(struct st_dwc3 *dwc3_data)
 	switch (dwc3_data->dr_mode) {
 	case USB_DR_MODE_PERIPHERAL:
 
-		val &= ~(USB3_FORCE_VBUSVALID | USB3_DELAY_VBUSVALID
+		val &= ~(USB3_DELAY_VBUSVALID
 			| USB3_SEL_FORCE_OPMODE | USB3_FORCE_OPMODE(0x3)
 			| USB3_SEL_FORCE_DPPULLDOWN2 | USB3_FORCE_DPPULLDOWN2
 			| USB3_SEL_FORCE_DMPULLDOWN2 | USB3_FORCE_DMPULLDOWN2);
 
-		val |= USB3_DEVICE_NOT_HOST;
+		/*
+		 * USB3_PORT2_FORCE_VBUSVALID When '1' and when
+		 * USB3_PORT2_DEVICE_NOT_HOST = 1, forces VBUSVLDEXT2 input
+		 * of the pico PHY to 1.
+		 */
+
+		val |= USB3_DEVICE_NOT_HOST | USB3_FORCE_VBUSVALID;
 		break;
 
 	case USB_DR_MODE_HOST:
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 9a7d0bd15dc3..07248ff1be5c 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -347,6 +347,28 @@ int dwc3_send_gadget_ep_cmd(struct dwc3 *dwc, unsigned ep,
 	return ret;
 }
 
+static int dwc3_send_clear_stall_ep_cmd(struct dwc3_ep *dep)
+{
+	struct dwc3 *dwc = dep->dwc;
+	struct dwc3_gadget_ep_cmd_params params;
+	u32 cmd = DWC3_DEPCMD_CLEARSTALL;
+
+	/*
+	 * As of core revision 2.60a the recommended programming model
+	 * is to set the ClearPendIN bit when issuing a Clear Stall EP
+	 * command for IN endpoints. This is to prevent an issue where
+	 * some (non-compliant) hosts may not send ACK TPs for pending
+	 * IN transfers due to a mishandled error condition. Synopsys
+	 * STAR 9000614252.
+	 */
+	if (dep->direction && (dwc->revision >= DWC3_REVISION_260A))
+		cmd |= DWC3_DEPCMD_CLEARPENDIN;
+
+	memset(&params, 0, sizeof(params));
+
+	return dwc3_send_gadget_ep_cmd(dwc, dep->number, cmd, &params);
+}
+
 static dma_addr_t dwc3_trb_dma_offset(struct dwc3_ep *dep,
 		struct dwc3_trb *trb)
 {
@@ -1314,8 +1336,7 @@ int __dwc3_gadget_ep_set_halt(struct dwc3_ep *dep, int value, int protocol)
 		else
 			dep->flags |= DWC3_EP_STALL;
 	} else {
-		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
-			DWC3_DEPCMD_CLEARSTALL, &params);
+		ret = dwc3_send_clear_stall_ep_cmd(dep);
 		if (ret)
 			dev_err(dwc->dev, "failed to clear STALL on %s\n",
 					dep->name);
@@ -2247,7 +2268,6 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc)
 
 	for (epnum = 1; epnum < DWC3_ENDPOINTS_NUM; epnum++) {
 		struct dwc3_ep *dep;
-		struct dwc3_gadget_ep_cmd_params params;
 		int ret;
 
 		dep = dwc->eps[epnum];
@@ -2259,9 +2279,7 @@ static void dwc3_clear_stall_all_ep(struct dwc3 *dwc)
 
 		dep->flags &= ~DWC3_EP_STALL;
 
-		memset(&params, 0, sizeof(params));
-		ret = dwc3_send_gadget_ep_cmd(dwc, dep->number,
-				DWC3_DEPCMD_CLEARSTALL, &params);
+		ret = dwc3_send_clear_stall_ep_cmd(dep);
 		WARN_ON_ONCE(ret);
 	}
 }
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index d67de0d22a2b..eb648485a58c 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1868,14 +1868,19 @@ unknown:
 				}
 				break;
 			}
-			req->length = value;
-			req->context = cdev;
-			req->zero = value < w_length;
-			value = composite_ep0_queue(cdev, req, GFP_ATOMIC);
-			if (value < 0) {
-				DBG(cdev, "ep_queue --> %d\n", value);
-				req->status = 0;
-				composite_setup_complete(gadget->ep0, req);
+
+			if (value >= 0) {
+				req->length = value;
+				req->context = cdev;
+				req->zero = value < w_length;
+				value = composite_ep0_queue(cdev, req,
+							    GFP_ATOMIC);
+				if (value < 0) {
+					DBG(cdev, "ep_queue --> %d\n", value);
+					req->status = 0;
+					composite_setup_complete(gadget->ep0,
+								 req);
+				}
 			}
 			return value;
 		}
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index b6f60ca8a035..70cf3477f951 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1401,6 +1401,7 @@ static const struct usb_gadget_driver configfs_driver_template = {
 		.owner          = THIS_MODULE,
 		.name		= "configfs-gadget",
 	},
+	.match_existing_only = 1,
 };
 
 static struct config_group *gadgets_make(
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index 73515d54e1cc..cc33d2667408 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -2051,7 +2051,7 @@ static int __ffs_data_do_os_desc(enum ffs_os_desc_type type,
 
 		if (len < sizeof(*d) ||
 		    d->bFirstInterfaceNumber >= ffs->interfaces_count ||
-		    d->Reserved1)
+		    !d->Reserved1)
 			return -EINVAL;
 		for (i = 0; i < ARRAY_SIZE(d->Reserved2); ++i)
 			if (d->Reserved2[i])
@@ -2729,6 +2729,7 @@ static int _ffs_func_bind(struct usb_configuration *c,
 		func->ffs->ss_descs_count;
 
 	int fs_len, hs_len, ss_len, ret, i;
+	struct ffs_ep *eps_ptr;
 
 	/* Make it a single chunk, less management later on */
 	vla_group(d);
@@ -2777,12 +2778,9 @@ static int _ffs_func_bind(struct usb_configuration *c,
 	       ffs->raw_descs_length);
 
 	memset(vla_ptr(vlabuf, d, inums), 0xff, d_inums__sz);
-	for (ret = ffs->eps_count; ret; --ret) {
-		struct ffs_ep *ptr;
-
-		ptr = vla_ptr(vlabuf, d, eps);
-		ptr[ret].num = -1;
-	}
+	eps_ptr = vla_ptr(vlabuf, d, eps);
+	for (i = 0; i < ffs->eps_count; i++)
+		eps_ptr[i].num = -1;
 
 	/* Save pointers
 	 * d_eps == vlabuf, func->eps used to kfree vlabuf later
@@ -2851,7 +2849,7 @@ static int _ffs_func_bind(struct usb_configuration *c,
 		goto error;
 
 	func->function.os_desc_table = vla_ptr(vlabuf, d, os_desc_table);
-	if (c->cdev->use_os_string)
+	if (c->cdev->use_os_string) {
 		for (i = 0; i < ffs->interfaces_count; ++i) {
 			struct usb_os_desc *desc;
 
@@ -2862,13 +2860,15 @@ static int _ffs_func_bind(struct usb_configuration *c,
 				vla_ptr(vlabuf, d, ext_compat) + i * 16;
 			INIT_LIST_HEAD(&desc->ext_prop);
 		}
-	ret = ffs_do_os_descs(ffs->ms_os_descs_count,
-			      vla_ptr(vlabuf, d, raw_descs) +
-			      fs_len + hs_len + ss_len,
-			      d_raw_descs__sz - fs_len - hs_len - ss_len,
-			      __ffs_func_bind_do_os_desc, func);
-	if (unlikely(ret < 0))
-		goto error;
+		ret = ffs_do_os_descs(ffs->ms_os_descs_count,
+				      vla_ptr(vlabuf, d, raw_descs) +
+				      fs_len + hs_len + ss_len,
+				      d_raw_descs__sz - fs_len - hs_len -
+				      ss_len,
+				      __ffs_func_bind_do_os_desc, func);
+		if (unlikely(ret < 0))
+			goto error;
+	}
 	func->function.os_desc_n =
 		c->cdev->use_os_string ? ffs->interfaces_count : 0;
 
diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index c45104e3a64b..64706a789580 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -161,14 +161,6 @@ static struct usb_endpoint_descriptor hs_ep_out_desc = {
 	.wMaxPacketSize =	cpu_to_le16(512)
 };
 
-static struct usb_qualifier_descriptor dev_qualifier = {
-	.bLength =		sizeof(dev_qualifier),
-	.bDescriptorType =	USB_DT_DEVICE_QUALIFIER,
-	.bcdUSB =		cpu_to_le16(0x0200),
-	.bDeviceClass =		USB_CLASS_PRINTER,
-	.bNumConfigurations =	1
-};
-
 static struct usb_descriptor_header *hs_printer_function[] = {
 	(struct usb_descriptor_header *) &intf_desc,
 	(struct usb_descriptor_header *) &hs_ep_in_desc,
diff --git a/drivers/usb/gadget/function/f_tcm.c b/drivers/usb/gadget/function/f_tcm.c
index 35fe3c80cfc0..197f73386fac 100644
--- a/drivers/usb/gadget/function/f_tcm.c
+++ b/drivers/usb/gadget/function/f_tcm.c
@@ -1445,16 +1445,18 @@ static void usbg_drop_tpg(struct se_portal_group *se_tpg)
 	for (i = 0; i < TPG_INSTANCES; ++i)
 		if (tpg_instances[i].tpg == tpg)
 			break;
-	if (i < TPG_INSTANCES)
+	if (i < TPG_INSTANCES) {
 		tpg_instances[i].tpg = NULL;
-	opts = container_of(tpg_instances[i].func_inst,
-		struct f_tcm_opts, func_inst);
-	mutex_lock(&opts->dep_lock);
-	if (opts->has_dep)
-		module_put(opts->dependent);
-	else
-		configfs_undepend_item_unlocked(&opts->func_inst.group.cg_item);
-	mutex_unlock(&opts->dep_lock);
+		opts = container_of(tpg_instances[i].func_inst,
+			struct f_tcm_opts, func_inst);
+		mutex_lock(&opts->dep_lock);
+		if (opts->has_dep)
+			module_put(opts->dependent);
+		else
+			configfs_undepend_item_unlocked(
+				&opts->func_inst.group.cg_item);
+		mutex_unlock(&opts->dep_lock);
+	}
 	mutex_unlock(&tpg_instances_lock);
 
 	kfree(tpg);
diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c
index 186d4b162524..cd214ec8a601 100644
--- a/drivers/usb/gadget/function/f_uac2.c
+++ b/drivers/usb/gadget/function/f_uac2.c
@@ -598,18 +598,6 @@ static struct usb_gadget_strings *fn_strings[] = {
 	NULL,
 };
 
-static struct usb_qualifier_descriptor devqual_desc = {
-	.bLength = sizeof devqual_desc,
-	.bDescriptorType = USB_DT_DEVICE_QUALIFIER,
-
-	.bcdUSB = cpu_to_le16(0x200),
-	.bDeviceClass = USB_CLASS_MISC,
-	.bDeviceSubClass = 0x02,
-	.bDeviceProtocol = 0x01,
-	.bNumConfigurations = 1,
-	.bRESERVED = 0,
-};
-
 static struct usb_interface_assoc_descriptor iad_desc = {
 	.bLength = sizeof iad_desc,
 	.bDescriptorType = USB_DT_INTERFACE_ASSOCIATION,
@@ -1292,6 +1280,7 @@ in_rq_cur(struct usb_function *fn, const struct usb_ctrlrequest *cr)
 
 	if (control_selector == UAC2_CS_CONTROL_SAM_FREQ) {
 		struct cntrl_cur_lay3 c;
+		memset(&c, 0, sizeof(struct cntrl_cur_lay3));
 
 		if (entity_id == USB_IN_CLK_ID)
 			c.dCUR = p_srate;
diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c
index d62683017cf3..990df221c629 100644
--- a/drivers/usb/gadget/function/storage_common.c
+++ b/drivers/usb/gadget/function/storage_common.c
@@ -83,9 +83,7 @@ EXPORT_SYMBOL_GPL(fsg_fs_function);
  * USB 2.0 devices need to expose both high speed and full speed
  * descriptors, unless they only run at full speed.
  *
- * That means alternate endpoint descriptors (bigger packets)
- * and a "device qualifier" ... plus more construction options
- * for the configuration descriptor.
+ * That means alternate endpoint descriptors (bigger packets).
  */
 struct usb_endpoint_descriptor fsg_hs_bulk_in_desc = {
 	.bLength =		USB_DT_ENDPOINT_SIZE,
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index e64479f882a5..aa3707bdebb4 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -938,8 +938,11 @@ ep0_read (struct file *fd, char __user *buf, size_t len, loff_t *ptr)
 			struct usb_ep		*ep = dev->gadget->ep0;
 			struct usb_request	*req = dev->req;
 
-			if ((retval = setup_req (ep, req, 0)) == 0)
-				retval = usb_ep_queue (ep, req, GFP_ATOMIC);
+			if ((retval = setup_req (ep, req, 0)) == 0) {
+				spin_unlock_irq (&dev->lock);
+				retval = usb_ep_queue (ep, req, GFP_KERNEL);
+				spin_lock_irq (&dev->lock);
+			}
 			dev->state = STATE_DEV_CONNECTED;
 
 			/* assume that was SET_CONFIGURATION */
@@ -1457,8 +1460,11 @@ delegate:
 							w_length);
 				if (value < 0)
 					break;
+
+				spin_unlock (&dev->lock);
 				value = usb_ep_queue (gadget->ep0, dev->req,
-							GFP_ATOMIC);
+							GFP_KERNEL);
+				spin_lock (&dev->lock);
 				if (value < 0) {
 					clean_req (gadget->ep0, dev->req);
 					break;
@@ -1481,11 +1487,14 @@ delegate:
 	if (value >= 0 && dev->state != STATE_DEV_SETUP) {
 		req->length = value;
 		req->zero = value < w_length;
-		value = usb_ep_queue (gadget->ep0, req, GFP_ATOMIC);
+
+		spin_unlock (&dev->lock);
+		value = usb_ep_queue (gadget->ep0, req, GFP_KERNEL);
 		if (value < 0) {
 			DBG (dev, "ep_queue --> %d\n", value);
 			req->status = 0;
 		}
+		return value;
 	}
 
 	/* device stalls when value < 0 */
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index 6e8300d6a737..e1b2dcebdc2e 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -603,11 +603,15 @@ int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
 		}
 	}
 
-	list_add_tail(&driver->pending, &gadget_driver_pending_list);
-	pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n",
-		driver->function);
+	if (!driver->match_existing_only) {
+		list_add_tail(&driver->pending, &gadget_driver_pending_list);
+		pr_info("udc-core: couldn't find an available UDC - added [%s] to list of pending drivers\n",
+			driver->function);
+		ret = 0;
+	}
+
 	mutex_unlock(&udc_lock);
-	return 0;
+	return ret;
 found:
 	ret = udc_bind_to_driver(udc, driver);
 	mutex_unlock(&udc_lock);
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index ae1b6e69eb96..a962b89b65a6 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -368,6 +368,15 @@ static void ehci_shutdown(struct usb_hcd *hcd)
 {
 	struct ehci_hcd	*ehci = hcd_to_ehci(hcd);
 
+	/**
+	 * Protect the system from crashing at system shutdown in cases where
+	 * usb host is not added yet from OTG controller driver.
+	 * As ehci_setup() not done yet, so stop accessing registers or
+	 * variables initialized in ehci_setup()
+	 */
+	if (!ehci->sbrn)
+		return;
+
 	spin_lock_irq(&ehci->lock);
 	ehci->shutdown = true;
 	ehci->rh_state = EHCI_RH_STOPPING;
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index ffc90295a95f..74f62d68f013 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -872,15 +872,23 @@ int ehci_hub_control(
 ) {
 	struct ehci_hcd	*ehci = hcd_to_ehci (hcd);
 	int		ports = HCS_N_PORTS (ehci->hcs_params);
-	u32 __iomem	*status_reg = &ehci->regs->port_status[
-				(wIndex & 0xff) - 1];
-	u32 __iomem	*hostpc_reg = &ehci->regs->hostpc[(wIndex & 0xff) - 1];
+	u32 __iomem	*status_reg, *hostpc_reg;
 	u32		temp, temp1, status;
 	unsigned long	flags;
 	int		retval = 0;
 	unsigned	selector;
 
 	/*
+	 * Avoid underflow while calculating (wIndex & 0xff) - 1.
+	 * The compiler might deduce that wIndex can never be 0 and then
+	 * optimize away the tests for !wIndex below.
+	 */
+	temp = wIndex & 0xff;
+	temp -= (temp > 0);
+	status_reg = &ehci->regs->port_status[temp];
+	hostpc_reg = &ehci->regs->hostpc[temp];
+
+	/*
 	 * FIXME:  support SetPortFeatures USB_PORT_FEAT_INDICATOR.
 	 * HCS_INDICATOR may say we can change LEDs to off/amber/green.
 	 * (track current state ourselves) ... blink for diagnostics,
diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index d3afc89d00f5..2f8d3af811ce 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -179,22 +179,32 @@ static int ehci_msm_remove(struct platform_device *pdev)
 static int ehci_msm_pm_suspend(struct device *dev)
 {
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 	bool do_wakeup = device_may_wakeup(dev);
 
 	dev_dbg(dev, "ehci-msm PM suspend\n");
 
-	return ehci_suspend(hcd, do_wakeup);
+	/* Only call ehci_suspend if ehci_setup has been done */
+	if (ehci->sbrn)
+		return ehci_suspend(hcd, do_wakeup);
+
+	return 0;
 }
 
 static int ehci_msm_pm_resume(struct device *dev)
 {
 	struct usb_hcd *hcd = dev_get_drvdata(dev);
+	struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 
 	dev_dbg(dev, "ehci-msm PM resume\n");
-	ehci_resume(hcd, false);
+
+	/* Only call ehci_resume if ehci_setup has been done */
+	if (ehci->sbrn)
+		ehci_resume(hcd, false);
 
 	return 0;
 }
+
 #else
 #define ehci_msm_pm_suspend	NULL
 #define ehci_msm_pm_resume	NULL
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 4031b372008e..9a3d7db5be57 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -81,15 +81,23 @@ static int tegra_reset_usb_controller(struct platform_device *pdev)
 	struct usb_hcd *hcd = platform_get_drvdata(pdev);
 	struct tegra_ehci_hcd *tegra =
 		(struct tegra_ehci_hcd *)hcd_to_ehci(hcd)->priv;
+	bool has_utmi_pad_registers = false;
 
 	phy_np = of_parse_phandle(pdev->dev.of_node, "nvidia,phy", 0);
 	if (!phy_np)
 		return -ENOENT;
 
+	if (of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers"))
+		has_utmi_pad_registers = true;
+
 	if (!usb1_reset_attempted) {
 		struct reset_control *usb1_reset;
 
-		usb1_reset = of_reset_control_get(phy_np, "usb");
+		if (!has_utmi_pad_registers)
+			usb1_reset = of_reset_control_get(phy_np, "utmi-pads");
+		else
+			usb1_reset = tegra->rst;
+
 		if (IS_ERR(usb1_reset)) {
 			dev_warn(&pdev->dev,
 				 "can't get utmi-pads reset from the PHY\n");
@@ -99,13 +107,15 @@ static int tegra_reset_usb_controller(struct platform_device *pdev)
 			reset_control_assert(usb1_reset);
 			udelay(1);
 			reset_control_deassert(usb1_reset);
+
+			if (!has_utmi_pad_registers)
+				reset_control_put(usb1_reset);
 		}
 
-		reset_control_put(usb1_reset);
 		usb1_reset_attempted = true;
 	}
 
-	if (!of_property_read_bool(phy_np, "nvidia,has-utmi-pad-registers")) {
+	if (!has_utmi_pad_registers) {
 		reset_control_assert(tegra->rst);
 		udelay(1);
 		reset_control_deassert(tegra->rst);
diff --git a/drivers/usb/host/ohci-q.c b/drivers/usb/host/ohci-q.c
index d029bbe9eb36..641fed609911 100644
--- a/drivers/usb/host/ohci-q.c
+++ b/drivers/usb/host/ohci-q.c
@@ -183,7 +183,6 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed)
 {
 	int	branch;
 
-	ed->state = ED_OPER;
 	ed->ed_prev = NULL;
 	ed->ed_next = NULL;
 	ed->hwNextED = 0;
@@ -259,6 +258,8 @@ static int ed_schedule (struct ohci_hcd *ohci, struct ed *ed)
 	/* the HC may not see the schedule updates yet, but if it does
 	 * then they'll be properly ordered.
 	 */
+
+	ed->state = ED_OPER;
 	return 0;
 }
 
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 48672fac7ff3..c10972fcc8e4 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -37,6 +37,7 @@
 /* Device for a quirk */
 #define PCI_VENDOR_ID_FRESCO_LOGIC	0x1b73
 #define PCI_DEVICE_ID_FRESCO_LOGIC_PDK	0x1000
+#define PCI_DEVICE_ID_FRESCO_LOGIC_FL1009	0x1009
 #define PCI_DEVICE_ID_FRESCO_LOGIC_FL1400	0x1400
 
 #define PCI_VENDOR_ID_ETRON		0x1b6f
@@ -114,6 +115,10 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		xhci->quirks |= XHCI_TRUST_TX_LENGTH;
 	}
 
+	if (pdev->vendor == PCI_VENDOR_ID_FRESCO_LOGIC &&
+			pdev->device == PCI_DEVICE_ID_FRESCO_LOGIC_FL1009)
+		xhci->quirks |= XHCI_BROKEN_STREAMS;
+
 	if (pdev->vendor == PCI_VENDOR_ID_NEC)
 		xhci->quirks |= XHCI_NEC_HOST;
 
diff --git a/drivers/usb/host/xhci-plat.c b/drivers/usb/host/xhci-plat.c
index 676ea458148b..1f3f981fe7f8 100644
--- a/drivers/usb/host/xhci-plat.c
+++ b/drivers/usb/host/xhci-plat.c
@@ -196,6 +196,9 @@ static int xhci_plat_probe(struct platform_device *pdev)
 		ret = clk_prepare_enable(clk);
 		if (ret)
 			goto put_hcd;
+	} else if (PTR_ERR(clk) == -EPROBE_DEFER) {
+		ret = -EPROBE_DEFER;
+		goto put_hcd;
 	}
 
 	xhci = hcd_to_xhci(hcd);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 52deae4b7eac..d7d502578d79 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -290,6 +290,14 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
 
 	temp_64 = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
 	xhci->cmd_ring_state = CMD_RING_STATE_ABORTED;
+
+	/*
+	 * Writing the CMD_RING_ABORT bit should cause a cmd completion event,
+	 * however on some host hw the CMD_RING_RUNNING bit is correctly cleared
+	 * but the completion event in never sent. Use the cmd timeout timer to
+	 * handle those cases. Use twice the time to cover the bit polling retry
+	 */
+	mod_timer(&xhci->cmd_timer, jiffies + (2 * XHCI_CMD_DEFAULT_TIMEOUT));
 	xhci_write_64(xhci, temp_64 | CMD_RING_ABORT,
 			&xhci->op_regs->cmd_ring);
 
@@ -314,6 +322,7 @@ static int xhci_abort_cmd_ring(struct xhci_hcd *xhci)
 
 		xhci_err(xhci, "Stopped the command ring failed, "
 				"maybe the host is dead\n");
+		del_timer(&xhci->cmd_timer);
 		xhci->xhc_state |= XHCI_STATE_DYING;
 		xhci_quiesce(xhci);
 		xhci_halt(xhci);
@@ -1246,22 +1255,21 @@ void xhci_handle_command_timeout(unsigned long data)
 	int ret;
 	unsigned long flags;
 	u64 hw_ring_state;
-	struct xhci_command *cur_cmd = NULL;
+	bool second_timeout = false;
 	xhci = (struct xhci_hcd *) data;
 
 	/* mark this command to be cancelled */
 	spin_lock_irqsave(&xhci->lock, flags);
 	if (xhci->current_cmd) {
-		cur_cmd = xhci->current_cmd;
-		cur_cmd->status = COMP_CMD_ABORT;
+		if (xhci->current_cmd->status == COMP_CMD_ABORT)
+			second_timeout = true;
+		xhci->current_cmd->status = COMP_CMD_ABORT;
 	}
 
-
 	/* Make sure command ring is running before aborting it */
 	hw_ring_state = xhci_read_64(xhci, &xhci->op_regs->cmd_ring);
 	if ((xhci->cmd_ring_state & CMD_RING_STATE_RUNNING) &&
 	    (hw_ring_state & CMD_RING_RUNNING))  {
-
 		spin_unlock_irqrestore(&xhci->lock, flags);
 		xhci_dbg(xhci, "Command timeout\n");
 		ret = xhci_abort_cmd_ring(xhci);
@@ -1273,6 +1281,15 @@ void xhci_handle_command_timeout(unsigned long data)
 		}
 		return;
 	}
+
+	/* command ring failed to restart, or host removed. Bail out */
+	if (second_timeout || xhci->xhc_state & XHCI_STATE_REMOVING) {
+		spin_unlock_irqrestore(&xhci->lock, flags);
+		xhci_dbg(xhci, "command timed out twice, ring start fail?\n");
+		xhci_cleanup_command_queue(xhci);
+		return;
+	}
+
 	/* command timeout on stopped ring, ring can't be aborted */
 	xhci_dbg(xhci, "Command timeout on stopped ring\n");
 	xhci_handle_stopped_cmd_ring(xhci, xhci->current_cmd);
@@ -2721,7 +2738,8 @@ hw_died:
 		writel(irq_pending, &xhci->ir_set->irq_pending);
 	}
 
-	if (xhci->xhc_state & XHCI_STATE_DYING) {
+	if (xhci->xhc_state & XHCI_STATE_DYING ||
+	    xhci->xhc_state & XHCI_STATE_HALTED) {
 		xhci_dbg(xhci, "xHCI dying, ignoring interrupt. "
 				"Shouldn't IRQs be disabled?\n");
 		/* Clear the event handler busy flag (RW1C);
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index fa7e1ef36cd9..f2f9518c53ab 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -685,20 +685,23 @@ void xhci_stop(struct usb_hcd *hcd)
 	u32 temp;
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
 
-	if (xhci->xhc_state & XHCI_STATE_HALTED)
-		return;
-
 	mutex_lock(&xhci->mutex);
-	spin_lock_irq(&xhci->lock);
-	xhci->xhc_state |= XHCI_STATE_HALTED;
-	xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
 
-	/* Make sure the xHC is halted for a USB3 roothub
-	 * (xhci_stop() could be called as part of failed init).
-	 */
-	xhci_halt(xhci);
-	xhci_reset(xhci);
-	spin_unlock_irq(&xhci->lock);
+	if (!(xhci->xhc_state & XHCI_STATE_HALTED)) {
+		spin_lock_irq(&xhci->lock);
+
+		xhci->xhc_state |= XHCI_STATE_HALTED;
+		xhci->cmd_ring_state = CMD_RING_STATE_STOPPED;
+		xhci_halt(xhci);
+		xhci_reset(xhci);
+
+		spin_unlock_irq(&xhci->lock);
+	}
+
+	if (!usb_hcd_is_primary_hcd(hcd)) {
+		mutex_unlock(&xhci->mutex);
+		return;
+	}
 
 	xhci_cleanup_msix(xhci);
 
@@ -4886,7 +4889,7 @@ int xhci_gen_setup(struct usb_hcd *hcd, xhci_get_quirks_t get_quirks)
 		xhci->hcc_params2 = readl(&xhci->cap_regs->hcc_params2);
 	xhci_print_registers(xhci);
 
-	xhci->quirks = quirks;
+	xhci->quirks |= quirks;
 
 	get_quirks(dev, xhci);
 
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 39fd95833eb8..f824336def5c 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -1090,29 +1090,6 @@ void musb_stop(struct musb *musb)
 	musb_platform_try_idle(musb, 0);
 }
 
-static void musb_shutdown(struct platform_device *pdev)
-{
-	struct musb	*musb = dev_to_musb(&pdev->dev);
-	unsigned long	flags;
-
-	pm_runtime_get_sync(musb->controller);
-
-	musb_host_cleanup(musb);
-	musb_gadget_cleanup(musb);
-
-	spin_lock_irqsave(&musb->lock, flags);
-	musb_platform_disable(musb);
-	musb_generic_disable(musb);
-	spin_unlock_irqrestore(&musb->lock, flags);
-
-	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
-	musb_platform_exit(musb);
-
-	pm_runtime_put(musb->controller);
-	/* FIXME power down */
-}
-
-
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -1702,7 +1679,7 @@ EXPORT_SYMBOL_GPL(musb_dma_completion);
 #define use_dma			0
 #endif
 
-static void (*musb_phy_callback)(enum musb_vbus_id_status status);
+static int (*musb_phy_callback)(enum musb_vbus_id_status status);
 
 /*
  * musb_mailbox - optional phy notifier function
@@ -1711,11 +1688,12 @@ static void (*musb_phy_callback)(enum musb_vbus_id_status status);
  * Optionally gets called from the USB PHY. Note that the USB PHY must be
  * disabled at the point the phy_callback is registered or unregistered.
  */
-void musb_mailbox(enum musb_vbus_id_status status)
+int musb_mailbox(enum musb_vbus_id_status status)
 {
 	if (musb_phy_callback)
-		musb_phy_callback(status);
+		return musb_phy_callback(status);
 
+	return -ENODEV;
 };
 EXPORT_SYMBOL_GPL(musb_mailbox);
 
@@ -2028,11 +2006,6 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	musb_readl = musb_default_readl;
 	musb_writel = musb_default_writel;
 
-	/* We need musb_read/write functions initialized for PM */
-	pm_runtime_use_autosuspend(musb->controller);
-	pm_runtime_set_autosuspend_delay(musb->controller, 200);
-	pm_runtime_enable(musb->controller);
-
 	/* The musb_platform_init() call:
 	 *   - adjusts musb->mregs
 	 *   - sets the musb->isr
@@ -2134,6 +2107,16 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	if (musb->ops->phy_callback)
 		musb_phy_callback = musb->ops->phy_callback;
 
+	/*
+	 * We need musb_read/write functions initialized for PM.
+	 * Note that at least 2430 glue needs autosuspend delay
+	 * somewhere above 300 ms for the hardware to idle properly
+	 * after disconnecting the cable in host mode. Let's use
+	 * 500 ms for some margin.
+	 */
+	pm_runtime_use_autosuspend(musb->controller);
+	pm_runtime_set_autosuspend_delay(musb->controller, 500);
+	pm_runtime_enable(musb->controller);
 	pm_runtime_get_sync(musb->controller);
 
 	status = usb_phy_init(musb->xceiv);
@@ -2237,13 +2220,8 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	if (status)
 		goto fail5;
 
-	pm_runtime_put(musb->controller);
-
-	/*
-	 * For why this is currently needed, see commit 3e43a0725637
-	 * ("usb: musb: core: add pm_runtime_irq_safe()")
-	 */
-	pm_runtime_irq_safe(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 
@@ -2265,7 +2243,9 @@ fail2_5:
 	usb_phy_shutdown(musb->xceiv);
 
 err_usb_phy_init:
+	pm_runtime_dont_use_autosuspend(musb->controller);
 	pm_runtime_put_sync(musb->controller);
+	pm_runtime_disable(musb->controller);
 
 fail2:
 	if (musb->irq_wake)
@@ -2273,7 +2253,6 @@ fail2:
 	musb_platform_exit(musb);
 
 fail1:
-	pm_runtime_disable(musb->controller);
 	dev_err(musb->controller,
 		"musb_init_controller failed with status %d\n", status);
 
@@ -2312,6 +2291,7 @@ static int musb_remove(struct platform_device *pdev)
 {
 	struct device	*dev = &pdev->dev;
 	struct musb	*musb = dev_to_musb(dev);
+	unsigned long	flags;
 
 	/* this gets called on rmmod.
 	 *  - Host mode: host may still be active
@@ -2319,17 +2299,26 @@ static int musb_remove(struct platform_device *pdev)
 	 *  - OTG mode: both roles are deactivated (or never-activated)
 	 */
 	musb_exit_debugfs(musb);
-	musb_shutdown(pdev);
-	musb_phy_callback = NULL;
-
-	if (musb->dma_controller)
-		musb_dma_controller_destroy(musb->dma_controller);
-
-	usb_phy_shutdown(musb->xceiv);
 
 	cancel_work_sync(&musb->irq_work);
 	cancel_delayed_work_sync(&musb->finish_resume_work);
 	cancel_delayed_work_sync(&musb->deassert_reset_work);
+	pm_runtime_get_sync(musb->controller);
+	musb_host_cleanup(musb);
+	musb_gadget_cleanup(musb);
+	spin_lock_irqsave(&musb->lock, flags);
+	musb_platform_disable(musb);
+	musb_generic_disable(musb);
+	spin_unlock_irqrestore(&musb->lock, flags);
+	musb_writeb(musb->mregs, MUSB_DEVCTL, 0);
+	pm_runtime_dont_use_autosuspend(musb->controller);
+	pm_runtime_put_sync(musb->controller);
+	pm_runtime_disable(musb->controller);
+	musb_platform_exit(musb);
+	musb_phy_callback = NULL;
+	if (musb->dma_controller)
+		musb_dma_controller_destroy(musb->dma_controller);
+	usb_phy_shutdown(musb->xceiv);
 	musb_free(musb);
 	device_init_wakeup(dev, 0);
 	return 0;
@@ -2429,7 +2418,8 @@ static void musb_restore_context(struct musb *musb)
 	musb_writew(musb_base, MUSB_INTRTXE, musb->intrtxe);
 	musb_writew(musb_base, MUSB_INTRRXE, musb->intrrxe);
 	musb_writeb(musb_base, MUSB_INTRUSBE, musb->context.intrusbe);
-	musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl);
+	if (musb->context.devctl & MUSB_DEVCTL_SESSION)
+		musb_writeb(musb_base, MUSB_DEVCTL, musb->context.devctl);
 
 	for (i = 0; i < musb->config->num_eps; ++i) {
 		struct musb_hw_ep	*hw_ep;
@@ -2612,7 +2602,6 @@ static struct platform_driver musb_driver = {
 	},
 	.probe		= musb_probe,
 	.remove		= musb_remove,
-	.shutdown	= musb_shutdown,
 };
 
 module_platform_driver(musb_driver);
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index b6afe9e43305..b55a776b03eb 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -215,7 +215,7 @@ struct musb_platform_ops {
 				dma_addr_t *dma_addr, u32 *len);
 	void	(*pre_root_reset_end)(struct musb *musb);
 	void	(*post_root_reset_end)(struct musb *musb);
-	void	(*phy_callback)(enum musb_vbus_id_status status);
+	int	(*phy_callback)(enum musb_vbus_id_status status);
 };
 
 /*
@@ -312,6 +312,7 @@ struct musb {
 	struct work_struct	irq_work;
 	struct delayed_work	deassert_reset_work;
 	struct delayed_work	finish_resume_work;
+	struct delayed_work	gadget_work;
 	u16			hwvers;
 
 	u16			intrrxe;
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 152865b36522..af2a3a7addf9 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1656,6 +1656,20 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
 	return usb_phy_set_power(musb->xceiv, mA);
 }
 
+static void musb_gadget_work(struct work_struct *work)
+{
+	struct musb *musb;
+	unsigned long flags;
+
+	musb = container_of(work, struct musb, gadget_work.work);
+	pm_runtime_get_sync(musb->controller);
+	spin_lock_irqsave(&musb->lock, flags);
+	musb_pullup(musb, musb->softconnect);
+	spin_unlock_irqrestore(&musb->lock, flags);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
+}
+
 static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 {
 	struct musb	*musb = gadget_to_musb(gadget);
@@ -1663,20 +1677,16 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 
 	is_on = !!is_on;
 
-	pm_runtime_get_sync(musb->controller);
-
 	/* NOTE: this assumes we are sensing vbus; we'd rather
 	 * not pullup unless the B-session is active.
 	 */
 	spin_lock_irqsave(&musb->lock, flags);
 	if (is_on != musb->softconnect) {
 		musb->softconnect = is_on;
-		musb_pullup(musb, is_on);
+		schedule_delayed_work(&musb->gadget_work, 0);
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
 
-	pm_runtime_put(musb->controller);
-
 	return 0;
 }
 
@@ -1845,7 +1855,7 @@ int musb_gadget_setup(struct musb *musb)
 #elif IS_ENABLED(CONFIG_USB_MUSB_GADGET)
 	musb->g.is_otg = 0;
 #endif
-
+	INIT_DELAYED_WORK(&musb->gadget_work, musb_gadget_work);
 	musb_g_init_endpoints(musb);
 
 	musb->is_active = 0;
@@ -1866,6 +1876,8 @@ void musb_gadget_cleanup(struct musb *musb)
 {
 	if (musb->port_mode == MUSB_PORT_MODE_HOST)
 		return;
+
+	cancel_delayed_work_sync(&musb->gadget_work);
 	usb_del_gadget_udc(&musb->g);
 }
 
@@ -1914,8 +1926,8 @@ static int musb_gadget_start(struct usb_gadget *g,
 	if (musb->xceiv->last_event == USB_EVENT_ID)
 		musb_platform_set_vbus(musb, 1);
 
-	if (musb->xceiv->last_event == USB_EVENT_NONE)
-		pm_runtime_put(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 
@@ -1934,8 +1946,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
 	struct musb	*musb = gadget_to_musb(g);
 	unsigned long	flags;
 
-	if (musb->xceiv->last_event == USB_EVENT_NONE)
-		pm_runtime_get_sync(musb->controller);
+	pm_runtime_get_sync(musb->controller);
 
 	/*
 	 * REVISIT always use otg_set_peripheral() here too;
@@ -1963,7 +1974,8 @@ static int musb_gadget_stop(struct usb_gadget *g)
 	 * that currently misbehaves.
 	 */
 
-	pm_runtime_put(musb->controller);
+	pm_runtime_mark_last_busy(musb->controller);
+	pm_runtime_put_autosuspend(musb->controller);
 
 	return 0;
 }
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 2f8ad7f1f482..d227a71d85e1 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -434,7 +434,13 @@ static void musb_advance_schedule(struct musb *musb, struct urb *urb,
 		}
 	}
 
-	if (qh != NULL && qh->is_ready) {
+	/*
+	 * The pipe must be broken if current urb->status is set, so don't
+	 * start next urb.
+	 * TODO: to minimize the risk of regression, only check urb->status
+	 * for RX, until we have a test case to understand the behavior of TX.
+	 */
+	if ((!status || !is_in) && qh && qh->is_ready) {
 		dev_dbg(musb->controller, "... next ep%d %cX urb %p\n",
 		    hw_ep->epnum, is_in ? 'R' : 'T', next_urb(qh));
 		musb_start_urb(musb, is_in, qh);
@@ -594,14 +600,13 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum)
 		musb_writew(ep->regs, MUSB_TXCSR, 0);
 
 	/* scrub all previous state, clearing toggle */
-	} else {
-		csr = musb_readw(ep->regs, MUSB_RXCSR);
-		if (csr & MUSB_RXCSR_RXPKTRDY)
-			WARNING("rx%d, packet/%d ready?\n", ep->epnum,
-				musb_readw(ep->regs, MUSB_RXCOUNT));
-
-		musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG);
 	}
+	csr = musb_readw(ep->regs, MUSB_RXCSR);
+	if (csr & MUSB_RXCSR_RXPKTRDY)
+		WARNING("rx%d, packet/%d ready?\n", ep->epnum,
+			musb_readw(ep->regs, MUSB_RXCOUNT));
+
+	musb_h_flush_rxfifo(ep, MUSB_RXCSR_CLRDATATOG);
 
 	/* target addr and (for multipoint) hub addr/port */
 	if (musb->is_multipoint) {
@@ -627,7 +632,7 @@ musb_rx_reinit(struct musb *musb, struct musb_qh *qh, u8 epnum)
 	ep->rx_reinit = 0;
 }
 
-static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
+static void musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
 		struct musb_hw_ep *hw_ep, struct musb_qh *qh,
 		struct urb *urb, u32 offset,
 		u32 *length, u8 *mode)
@@ -664,23 +669,18 @@ static int musb_tx_dma_set_mode_mentor(struct dma_controller *dma,
 	}
 	channel->desired_mode = *mode;
 	musb_writew(epio, MUSB_TXCSR, csr);
-
-	return 0;
 }
 
-static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
-					  struct musb_hw_ep *hw_ep,
-					  struct musb_qh *qh,
-					  struct urb *urb,
-					  u32 offset,
-					  u32 *length,
-					  u8 *mode)
+static void musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
+					   struct musb_hw_ep *hw_ep,
+					   struct musb_qh *qh,
+					   struct urb *urb,
+					   u32 offset,
+					   u32 *length,
+					   u8 *mode)
 {
 	struct dma_channel *channel = hw_ep->tx_channel;
 
-	if (!is_cppi_enabled(hw_ep->musb) && !tusb_dma_omap(hw_ep->musb))
-		return -ENODEV;
-
 	channel->actual_len = 0;
 
 	/*
@@ -688,8 +688,6 @@ static int musb_tx_dma_set_mode_cppi_tusb(struct dma_controller *dma,
 	 * to identify the zero-length-final-packet case.
 	 */
 	*mode = (urb->transfer_flags & URB_ZERO_PACKET) ? 1 : 0;
-
-	return 0;
 }
 
 static bool musb_tx_dma_program(struct dma_controller *dma,
@@ -699,15 +697,14 @@ static bool musb_tx_dma_program(struct dma_controller *dma,
 	struct dma_channel	*channel = hw_ep->tx_channel;
 	u16			pkt_size = qh->maxpacket;
 	u8			mode;
-	int			res;
 
 	if (musb_dma_inventra(hw_ep->musb) || musb_dma_ux500(hw_ep->musb))
-		res = musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb,
-						 offset, &length, &mode);
+		musb_tx_dma_set_mode_mentor(dma, hw_ep, qh, urb, offset,
+					    &length, &mode);
+	else if (is_cppi_enabled(hw_ep->musb) || tusb_dma_omap(hw_ep->musb))
+		musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb, offset,
+					       &length, &mode);
 	else
-		res = musb_tx_dma_set_mode_cppi_tusb(dma, hw_ep, qh, urb,
-						     offset, &length, &mode);
-	if (res)
 		return false;
 
 	qh->segsize = length;
@@ -995,9 +992,15 @@ static void musb_bulk_nak_timeout(struct musb *musb, struct musb_hw_ep *ep,
 	if (is_in) {
 		dma = is_dma_capable() ? ep->rx_channel : NULL;
 
-		/* clear nak timeout bit */
+		/*
+		 * Need to stop the transaction by clearing REQPKT first
+		 * then the NAK Timeout bit ref MUSBMHDRC USB 2.0 HIGH-SPEED
+		 * DUAL-ROLE CONTROLLER Programmer's Guide, section 9.2.2
+		 */
 		rx_csr = musb_readw(epio, MUSB_RXCSR);
 		rx_csr |= MUSB_RXCSR_H_WZC_BITS;
+		rx_csr &= ~MUSB_RXCSR_H_REQPKT;
+		musb_writew(epio, MUSB_RXCSR, rx_csr);
 		rx_csr &= ~MUSB_RXCSR_DATAERROR;
 		musb_writew(epio, MUSB_RXCSR, rx_csr);
 
@@ -1551,7 +1554,7 @@ static int musb_rx_dma_iso_cppi41(struct dma_controller *dma,
 				  struct urb *urb,
 				  size_t len)
 {
-	struct dma_channel *channel = hw_ep->tx_channel;
+	struct dma_channel *channel = hw_ep->rx_channel;
 	void __iomem *epio = hw_ep->regs;
 	dma_addr_t *buf;
 	u32 length, res;
@@ -1870,6 +1873,9 @@ void musb_host_rx(struct musb *musb, u8 epnum)
 		status = -EPROTO;
 		musb_writeb(epio, MUSB_RXINTERVAL, 0);
 
+		rx_csr &= ~MUSB_RXCSR_H_ERROR;
+		musb_writew(epio, MUSB_RXCSR, rx_csr);
+
 	} else if (rx_csr & MUSB_RXCSR_DATAERROR) {
 
 		if (USB_ENDPOINT_XFER_ISOC != qh->type) {
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index c84e0322c108..0b4cec940386 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -49,97 +49,14 @@ struct omap2430_glue {
 	enum musb_vbus_id_status status;
 	struct work_struct	omap_musb_mailbox_work;
 	struct device		*control_otghs;
+	bool			cable_connected;
+	bool			enabled;
+	bool			powered;
 };
 #define glue_to_musb(g)		platform_get_drvdata(g->musb)
 
 static struct omap2430_glue	*_glue;
 
-static struct timer_list musb_idle_timer;
-
-static void musb_do_idle(unsigned long _musb)
-{
-	struct musb	*musb = (void *)_musb;
-	unsigned long	flags;
-	u8	power;
-	u8	devctl;
-
-	spin_lock_irqsave(&musb->lock, flags);
-
-	switch (musb->xceiv->otg->state) {
-	case OTG_STATE_A_WAIT_BCON:
-
-		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-		if (devctl & MUSB_DEVCTL_BDEVICE) {
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-			MUSB_DEV_MODE(musb);
-		} else {
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-			MUSB_HST_MODE(musb);
-		}
-		break;
-	case OTG_STATE_A_SUSPEND:
-		/* finish RESUME signaling? */
-		if (musb->port1_status & MUSB_PORT_STAT_RESUME) {
-			power = musb_readb(musb->mregs, MUSB_POWER);
-			power &= ~MUSB_POWER_RESUME;
-			dev_dbg(musb->controller, "root port resume stopped, power %02x\n", power);
-			musb_writeb(musb->mregs, MUSB_POWER, power);
-			musb->is_active = 1;
-			musb->port1_status &= ~(USB_PORT_STAT_SUSPEND
-						| MUSB_PORT_STAT_RESUME);
-			musb->port1_status |= USB_PORT_STAT_C_SUSPEND << 16;
-			usb_hcd_poll_rh_status(musb->hcd);
-			/* NOTE: it might really be A_WAIT_BCON ... */
-			musb->xceiv->otg->state = OTG_STATE_A_HOST;
-		}
-		break;
-	case OTG_STATE_A_HOST:
-		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-		if (devctl &  MUSB_DEVCTL_BDEVICE)
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-		else
-			musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
-	default:
-		break;
-	}
-	spin_unlock_irqrestore(&musb->lock, flags);
-}
-
-
-static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
-{
-	unsigned long		default_timeout = jiffies + msecs_to_jiffies(3);
-	static unsigned long	last_timer;
-
-	if (timeout == 0)
-		timeout = default_timeout;
-
-	/* Never idle if active, or when VBUS timeout is not set as host */
-	if (musb->is_active || ((musb->a_wait_bcon == 0)
-			&& (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON))) {
-		dev_dbg(musb->controller, "%s active, deleting timer\n",
-			usb_otg_state_string(musb->xceiv->otg->state));
-		del_timer(&musb_idle_timer);
-		last_timer = jiffies;
-		return;
-	}
-
-	if (time_after(last_timer, timeout)) {
-		if (!timer_pending(&musb_idle_timer))
-			last_timer = timeout;
-		else {
-			dev_dbg(musb->controller, "Longer idle timer already pending, ignoring\n");
-			return;
-		}
-	}
-	last_timer = timeout;
-
-	dev_dbg(musb->controller, "%s inactive, for idle timer for %lu ms\n",
-		usb_otg_state_string(musb->xceiv->otg->state),
-		(unsigned long)jiffies_to_msecs(timeout - jiffies));
-	mod_timer(&musb_idle_timer, timeout);
-}
-
 static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 {
 	struct usb_otg	*otg = musb->xceiv->otg;
@@ -205,16 +122,6 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
-static int omap2430_musb_set_mode(struct musb *musb, u8 musb_mode)
-{
-	u8	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-
-	devctl |= MUSB_DEVCTL_SESSION;
-	musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
-
-	return 0;
-}
-
 static inline void omap2430_low_level_exit(struct musb *musb)
 {
 	u32 l;
@@ -234,22 +141,63 @@ static inline void omap2430_low_level_init(struct musb *musb)
 	musb_writel(musb->mregs, OTG_FORCESTDBY, l);
 }
 
-static void omap2430_musb_mailbox(enum musb_vbus_id_status status)
+/*
+ * We can get multiple cable events so we need to keep track
+ * of the power state. Only keep power enabled if USB cable is
+ * connected and a gadget is started.
+ */
+static void omap2430_set_power(struct musb *musb, bool enabled, bool cable)
+{
+	struct device *dev = musb->controller;
+	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
+	bool power_up;
+	int res;
+
+	if (glue->enabled != enabled)
+		glue->enabled = enabled;
+
+	if (glue->cable_connected != cable)
+		glue->cable_connected = cable;
+
+	power_up = glue->enabled && glue->cable_connected;
+	if (power_up == glue->powered) {
+		dev_warn(musb->controller, "power state already %i\n",
+			 power_up);
+		return;
+	}
+
+	glue->powered = power_up;
+
+	if (power_up) {
+		res = pm_runtime_get_sync(musb->controller);
+		if (res < 0) {
+			dev_err(musb->controller, "could not enable: %i", res);
+			glue->powered = false;
+		}
+	} else {
+		pm_runtime_mark_last_busy(musb->controller);
+		pm_runtime_put_autosuspend(musb->controller);
+	}
+}
+
+static int omap2430_musb_mailbox(enum musb_vbus_id_status status)
 {
 	struct omap2430_glue	*glue = _glue;
 
 	if (!glue) {
 		pr_err("%s: musb core is not yet initialized\n", __func__);
-		return;
+		return -EPROBE_DEFER;
 	}
 	glue->status = status;
 
 	if (!glue_to_musb(glue)) {
 		pr_err("%s: musb core is not yet ready\n", __func__);
-		return;
+		return -EPROBE_DEFER;
 	}
 
 	schedule_work(&glue->omap_musb_mailbox_work);
+
+	return 0;
 }
 
 static void omap_musb_set_mailbox(struct omap2430_glue *glue)
@@ -259,6 +207,13 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 	struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev);
 	struct omap_musb_board_data *data = pdata->board_data;
 	struct usb_otg *otg = musb->xceiv->otg;
+	bool cable_connected;
+
+	cable_connected = ((glue->status == MUSB_ID_GROUND) ||
+			   (glue->status == MUSB_VBUS_VALID));
+
+	if (cable_connected)
+		omap2430_set_power(musb, glue->enabled, cable_connected);
 
 	switch (glue->status) {
 	case MUSB_ID_GROUND:
@@ -268,7 +223,6 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		musb->xceiv->last_event = USB_EVENT_ID;
 		if (musb->gadget_driver) {
-			pm_runtime_get_sync(dev);
 			omap_control_usb_set_mode(glue->control_otghs,
 				USB_MODE_HOST);
 			omap2430_musb_set_vbus(musb, 1);
@@ -281,8 +235,6 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		otg->default_a = false;
 		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		musb->xceiv->last_event = USB_EVENT_VBUS;
-		if (musb->gadget_driver)
-			pm_runtime_get_sync(dev);
 		omap_control_usb_set_mode(glue->control_otghs, USB_MODE_DEVICE);
 		break;
 
@@ -291,11 +243,8 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		dev_dbg(dev, "VBUS Disconnect\n");
 
 		musb->xceiv->last_event = USB_EVENT_NONE;
-		if (musb->gadget_driver) {
+		if (musb->gadget_driver)
 			omap2430_musb_set_vbus(musb, 0);
-			pm_runtime_mark_last_busy(dev);
-			pm_runtime_put_autosuspend(dev);
-		}
 
 		if (data->interface_type == MUSB_INTERFACE_UTMI)
 			otg_set_vbus(musb->xceiv->otg, 0);
@@ -307,6 +256,9 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		dev_dbg(dev, "ID float\n");
 	}
 
+	if (!cable_connected)
+		omap2430_set_power(musb, glue->enabled, cable_connected);
+
 	atomic_notifier_call_chain(&musb->xceiv->notifier,
 			musb->xceiv->last_event, NULL);
 }
@@ -316,13 +268,8 @@ static void omap_musb_mailbox_work(struct work_struct *mailbox_work)
 {
 	struct omap2430_glue *glue = container_of(mailbox_work,
 				struct omap2430_glue, omap_musb_mailbox_work);
-	struct musb *musb = glue_to_musb(glue);
-	struct device *dev = musb->controller;
 
-	pm_runtime_get_sync(dev);
 	omap_musb_set_mailbox(glue);
-	pm_runtime_mark_last_busy(dev);
-	pm_runtime_put_autosuspend(dev);
 }
 
 static irqreturn_t omap2430_musb_interrupt(int irq, void *__hci)
@@ -389,23 +336,7 @@ static int omap2430_musb_init(struct musb *musb)
 		return PTR_ERR(musb->phy);
 	}
 	musb->isr = omap2430_musb_interrupt;
-
-	/*
-	 * Enable runtime PM for musb parent (this driver). We can't
-	 * do it earlier as struct musb is not yet allocated and we
-	 * need to touch the musb registers for runtime PM.
-	 */
-	pm_runtime_enable(glue->dev);
-	status = pm_runtime_get_sync(glue->dev);
-	if (status < 0)
-		goto err1;
-
-	status = pm_runtime_get_sync(dev);
-	if (status < 0) {
-		dev_err(dev, "pm_runtime_get_sync FAILED %d\n", status);
-		pm_runtime_put_sync(glue->dev);
-		goto err1;
-	}
+	phy_init(musb->phy);
 
 	l = musb_readl(musb->mregs, OTG_INTERFSEL);
 
@@ -427,20 +358,10 @@ static int omap2430_musb_init(struct musb *musb)
 			musb_readl(musb->mregs, OTG_INTERFSEL),
 			musb_readl(musb->mregs, OTG_SIMENABLE));
 
-	setup_timer(&musb_idle_timer, musb_do_idle, (unsigned long) musb);
-
 	if (glue->status != MUSB_UNKNOWN)
 		omap_musb_set_mailbox(glue);
 
-	phy_init(musb->phy);
-	phy_power_on(musb->phy);
-
-	pm_runtime_put_noidle(musb->controller);
-	pm_runtime_put_noidle(glue->dev);
 	return 0;
-
-err1:
-	return status;
 }
 
 static void omap2430_musb_enable(struct musb *musb)
@@ -452,6 +373,11 @@ static void omap2430_musb_enable(struct musb *musb)
 	struct musb_hdrc_platform_data *pdata = dev_get_platdata(dev);
 	struct omap_musb_board_data *data = pdata->board_data;
 
+	if (!WARN_ON(!musb->phy))
+		phy_power_on(musb->phy);
+
+	omap2430_set_power(musb, true, glue->cable_connected);
+
 	switch (glue->status) {
 
 	case MUSB_ID_GROUND:
@@ -487,18 +413,25 @@ static void omap2430_musb_disable(struct musb *musb)
 	struct device *dev = musb->controller;
 	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
+	if (!WARN_ON(!musb->phy))
+		phy_power_off(musb->phy);
+
 	if (glue->status != MUSB_UNKNOWN)
 		omap_control_usb_set_mode(glue->control_otghs,
 			USB_MODE_DISCONNECT);
+
+	omap2430_set_power(musb, false, glue->cable_connected);
 }
 
 static int omap2430_musb_exit(struct musb *musb)
 {
-	del_timer_sync(&musb_idle_timer);
+	struct device *dev = musb->controller;
+	struct omap2430_glue *glue = dev_get_drvdata(dev->parent);
 
 	omap2430_low_level_exit(musb);
-	phy_power_off(musb->phy);
 	phy_exit(musb->phy);
+	musb->phy = NULL;
+	cancel_work_sync(&glue->omap_musb_mailbox_work);
 
 	return 0;
 }
@@ -512,9 +445,6 @@ static const struct musb_platform_ops omap2430_ops = {
 	.init		= omap2430_musb_init,
 	.exit		= omap2430_musb_exit,
 
-	.set_mode	= omap2430_musb_set_mode,
-	.try_idle	= omap2430_musb_try_idle,
-
 	.set_vbus	= omap2430_musb_set_vbus,
 
 	.enable		= omap2430_musb_enable,
@@ -639,11 +569,9 @@ static int omap2430_probe(struct platform_device *pdev)
 		goto err2;
 	}
 
-	/*
-	 * Note that we cannot enable PM runtime yet for this
-	 * driver as we need struct musb initialized first.
-	 * See omap2430_musb_init above.
-	 */
+	pm_runtime_enable(glue->dev);
+	pm_runtime_use_autosuspend(glue->dev);
+	pm_runtime_set_autosuspend_delay(glue->dev, 500);
 
 	ret = platform_device_add(musb);
 	if (ret) {
@@ -662,12 +590,14 @@ err0:
 
 static int omap2430_remove(struct platform_device *pdev)
 {
-	struct omap2430_glue		*glue = platform_get_drvdata(pdev);
+	struct omap2430_glue *glue = platform_get_drvdata(pdev);
+	struct musb *musb = glue_to_musb(glue);
 
 	pm_runtime_get_sync(glue->dev);
-	cancel_work_sync(&glue->omap_musb_mailbox_work);
 	platform_device_unregister(glue->musb);
+	omap2430_set_power(musb, false, false);
 	pm_runtime_put_sync(glue->dev);
+	pm_runtime_dont_use_autosuspend(glue->dev);
 	pm_runtime_disable(glue->dev);
 
 	return 0;
@@ -680,12 +610,13 @@ static int omap2430_runtime_suspend(struct device *dev)
 	struct omap2430_glue		*glue = dev_get_drvdata(dev);
 	struct musb			*musb = glue_to_musb(glue);
 
-	if (musb) {
-		musb->context.otg_interfsel = musb_readl(musb->mregs,
-				OTG_INTERFSEL);
+	if (!musb)
+		return 0;
 
-		omap2430_low_level_exit(musb);
-	}
+	musb->context.otg_interfsel = musb_readl(musb->mregs,
+						 OTG_INTERFSEL);
+
+	omap2430_low_level_exit(musb);
 
 	return 0;
 }
@@ -696,7 +627,7 @@ static int omap2430_runtime_resume(struct device *dev)
 	struct musb			*musb = glue_to_musb(glue);
 
 	if (!musb)
-		return -EPROBE_DEFER;
+		return 0;
 
 	omap2430_low_level_init(musb);
 	musb_writel(musb->mregs, OTG_INTERFSEL,
@@ -738,18 +669,8 @@ static struct platform_driver omap2430_driver = {
 	},
 };
 
+module_platform_driver(omap2430_driver);
+
 MODULE_DESCRIPTION("OMAP2PLUS MUSB Glue Layer");
 MODULE_AUTHOR("Felipe Balbi <balbi@ti.com>");
 MODULE_LICENSE("GPL v2");
-
-static int __init omap2430_init(void)
-{
-	return platform_driver_register(&omap2430_driver);
-}
-subsys_initcall(omap2430_init);
-
-static void __exit omap2430_exit(void)
-{
-	platform_driver_unregister(&omap2430_driver);
-}
-module_exit(omap2430_exit);
diff --git a/drivers/usb/musb/sunxi.c b/drivers/usb/musb/sunxi.c
index fdab4232cfbf..76500515dd8b 100644
--- a/drivers/usb/musb/sunxi.c
+++ b/drivers/usb/musb/sunxi.c
@@ -80,7 +80,8 @@ static struct musb *sunxi_musb;
 
 struct sunxi_glue {
 	struct device		*dev;
-	struct platform_device	*musb;
+	struct musb		*musb;
+	struct platform_device	*musb_pdev;
 	struct clk		*clk;
 	struct reset_control	*rst;
 	struct phy		*phy;
@@ -102,7 +103,7 @@ static void sunxi_musb_work(struct work_struct *work)
 		return;
 
 	if (test_and_clear_bit(SUNXI_MUSB_FL_HOSTMODE_PEND, &glue->flags)) {
-		struct musb *musb = platform_get_drvdata(glue->musb);
+		struct musb *musb = glue->musb;
 		unsigned long flags;
 		u8 devctl;
 
@@ -112,7 +113,7 @@ static void sunxi_musb_work(struct work_struct *work)
 		if (test_bit(SUNXI_MUSB_FL_HOSTMODE, &glue->flags)) {
 			set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
 			musb->xceiv->otg->default_a = 1;
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			MUSB_HST_MODE(musb);
 			devctl |= MUSB_DEVCTL_SESSION;
 		} else {
@@ -145,10 +146,12 @@ static void sunxi_musb_set_vbus(struct musb *musb, int is_on)
 {
 	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
 
-	if (is_on)
+	if (is_on) {
 		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-	else
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
+	} else {
 		clear_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
+	}
 
 	schedule_work(&glue->work);
 }
@@ -264,15 +267,6 @@ static int sunxi_musb_init(struct musb *musb)
 	if (ret)
 		goto error_unregister_notifier;
 
-	if (musb->port_mode == MUSB_PORT_MODE_HOST) {
-		ret = phy_power_on(glue->phy);
-		if (ret)
-			goto error_phy_exit;
-		set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags);
-		/* Stop musb work from turning vbus off again */
-		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
-	}
-
 	musb->isr = sunxi_musb_interrupt;
 
 	/* Stop the musb-core from doing runtime pm (not supported on sunxi) */
@@ -280,8 +274,6 @@ static int sunxi_musb_init(struct musb *musb)
 
 	return 0;
 
-error_phy_exit:
-	phy_exit(glue->phy);
 error_unregister_notifier:
 	if (musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
 		extcon_unregister_notifier(glue->extcon, EXTCON_USB_HOST,
@@ -323,10 +315,31 @@ static int sunxi_musb_exit(struct musb *musb)
 	return 0;
 }
 
+static int sunxi_set_mode(struct musb *musb, u8 mode)
+{
+	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
+	int ret;
+
+	if (mode == MUSB_HOST) {
+		ret = phy_power_on(glue->phy);
+		if (ret)
+			return ret;
+
+		set_bit(SUNXI_MUSB_FL_PHY_ON, &glue->flags);
+		/* Stop musb work from turning vbus off again */
+		set_bit(SUNXI_MUSB_FL_VBUS_ON, &glue->flags);
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
+	}
+
+	return 0;
+}
+
 static void sunxi_musb_enable(struct musb *musb)
 {
 	struct sunxi_glue *glue = dev_get_drvdata(musb->controller->parent);
 
+	glue->musb = musb;
+
 	/* musb_core does not call us in a balanced manner */
 	if (test_and_set_bit(SUNXI_MUSB_FL_ENABLED, &glue->flags))
 		return;
@@ -569,6 +582,7 @@ static const struct musb_platform_ops sunxi_musb_ops = {
 	.exit		= sunxi_musb_exit,
 	.enable		= sunxi_musb_enable,
 	.disable	= sunxi_musb_disable,
+	.set_mode	= sunxi_set_mode,
 	.fifo_offset	= sunxi_musb_fifo_offset,
 	.ep_offset	= sunxi_musb_ep_offset,
 	.busctl_offset	= sunxi_musb_busctl_offset,
@@ -721,9 +735,9 @@ static int sunxi_musb_probe(struct platform_device *pdev)
 	pinfo.data	= &pdata;
 	pinfo.size_data = sizeof(pdata);
 
-	glue->musb = platform_device_register_full(&pinfo);
-	if (IS_ERR(glue->musb)) {
-		ret = PTR_ERR(glue->musb);
+	glue->musb_pdev = platform_device_register_full(&pinfo);
+	if (IS_ERR(glue->musb_pdev)) {
+		ret = PTR_ERR(glue->musb_pdev);
 		dev_err(&pdev->dev, "Error registering musb dev: %d\n", ret);
 		goto err_unregister_usb_phy;
 	}
@@ -740,7 +754,7 @@ static int sunxi_musb_remove(struct platform_device *pdev)
 	struct sunxi_glue *glue = platform_get_drvdata(pdev);
 	struct platform_device *usb_phy = glue->usb_phy;
 
-	platform_device_unregister(glue->musb); /* Frees glue ! */
+	platform_device_unregister(glue->musb_pdev);
 	usb_phy_generic_unregister(usb_phy);
 
 	return 0;
diff --git a/drivers/usb/phy/phy-twl6030-usb.c b/drivers/usb/phy/phy-twl6030-usb.c
index 24e2b3cf1867..a72e8d670adc 100644
--- a/drivers/usb/phy/phy-twl6030-usb.c
+++ b/drivers/usb/phy/phy-twl6030-usb.c
@@ -97,6 +97,9 @@ struct twl6030_usb {
 
 	struct regulator		*usb3v3;
 
+	/* used to check initial cable status after probe */
+	struct delayed_work	get_status_work;
+
 	/* used to set vbus, in atomic path */
 	struct work_struct	set_vbus_work;
 
@@ -227,12 +230,16 @@ static irqreturn_t twl6030_usb_irq(int irq, void *_twl)
 			twl->asleep = 1;
 			status = MUSB_VBUS_VALID;
 			twl->linkstat = status;
-			musb_mailbox(status);
+			ret = musb_mailbox(status);
+			if (ret)
+				twl->linkstat = MUSB_UNKNOWN;
 		} else {
 			if (twl->linkstat != MUSB_UNKNOWN) {
 				status = MUSB_VBUS_OFF;
 				twl->linkstat = status;
-				musb_mailbox(status);
+				ret = musb_mailbox(status);
+				if (ret)
+					twl->linkstat = MUSB_UNKNOWN;
 				if (twl->asleep) {
 					regulator_disable(twl->usb3v3);
 					twl->asleep = 0;
@@ -264,7 +271,9 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl)
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_SET);
 		status = MUSB_ID_GROUND;
 		twl->linkstat = status;
-		musb_mailbox(status);
+		ret = musb_mailbox(status);
+		if (ret)
+			twl->linkstat = MUSB_UNKNOWN;
 	} else  {
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x10, USB_ID_INT_EN_HI_CLR);
 		twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET);
@@ -274,6 +283,15 @@ static irqreturn_t twl6030_usbotg_irq(int irq, void *_twl)
 	return IRQ_HANDLED;
 }
 
+static void twl6030_status_work(struct work_struct *work)
+{
+	struct twl6030_usb *twl = container_of(work, struct twl6030_usb,
+					       get_status_work.work);
+
+	twl6030_usb_irq(twl->irq2, twl);
+	twl6030_usbotg_irq(twl->irq1, twl);
+}
+
 static int twl6030_enable_irq(struct twl6030_usb *twl)
 {
 	twl6030_writeb(twl, TWL_MODULE_USB, 0x1, USB_ID_INT_EN_HI_SET);
@@ -284,8 +302,6 @@ static int twl6030_enable_irq(struct twl6030_usb *twl)
 				REG_INT_MSK_LINE_C);
 	twl6030_interrupt_unmask(TWL6030_CHARGER_CTRL_INT_MASK,
 				REG_INT_MSK_STS_C);
-	twl6030_usb_irq(twl->irq2, twl);
-	twl6030_usbotg_irq(twl->irq1, twl);
 
 	return 0;
 }
@@ -371,6 +387,7 @@ static int twl6030_usb_probe(struct platform_device *pdev)
 		dev_warn(&pdev->dev, "could not create sysfs file\n");
 
 	INIT_WORK(&twl->set_vbus_work, otg_set_vbus_work);
+	INIT_DELAYED_WORK(&twl->get_status_work, twl6030_status_work);
 
 	status = request_threaded_irq(twl->irq1, NULL, twl6030_usbotg_irq,
 			IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING | IRQF_ONESHOT,
@@ -395,6 +412,7 @@ static int twl6030_usb_probe(struct platform_device *pdev)
 
 	twl->asleep = 0;
 	twl6030_enable_irq(twl);
+	schedule_delayed_work(&twl->get_status_work, HZ);
 	dev_info(&pdev->dev, "Initialized TWL6030 USB module\n");
 
 	return 0;
@@ -404,6 +422,7 @@ static int twl6030_usb_remove(struct platform_device *pdev)
 {
 	struct twl6030_usb *twl = platform_get_drvdata(pdev);
 
+	cancel_delayed_work(&twl->get_status_work);
 	twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK,
 		REG_INT_MSK_LINE_C);
 	twl6030_interrupt_mask(TWL6030_USBOTG_INT_MASK,
diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c
index 2eddbe538cda..5608af4a369d 100644
--- a/drivers/usb/serial/mos7720.c
+++ b/drivers/usb/serial/mos7720.c
@@ -2007,6 +2007,7 @@ static void mos7720_release(struct usb_serial *serial)
 				    urblist_entry)
 			usb_unlink_urb(urbtrack->urb);
 		spin_unlock_irqrestore(&mos_parport->listlock, flags);
+		parport_del_port(mos_parport->pp);
 
 		kref_put(&mos_parport->ref_count, destroy_mos_parport);
 	}
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 4d49fce406e1..5ef014ba6ae8 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -836,6 +836,7 @@ static int uas_slave_configure(struct scsi_device *sdev)
 	if (devinfo->flags & US_FL_BROKEN_FUA)
 		sdev->broken_fua = 1;
 
+	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
 	return 0;
 }
 
@@ -848,7 +849,6 @@ static struct scsi_host_template uas_host_template = {
 	.slave_configure = uas_slave_configure,
 	.eh_abort_handler = uas_eh_abort_handler,
 	.eh_bus_reset_handler = uas_eh_bus_reset_handler,
-	.can_queue = MAX_CMNDS,
 	.this_id = -1,
 	.sg_tablesize = SG_NONE,
 	.skip_settle_delay = 1,
diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c
index fca51105974e..2e0450bec1b1 100644
--- a/drivers/usb/usbip/vhci_hcd.c
+++ b/drivers/usb/usbip/vhci_hcd.c
@@ -941,7 +941,7 @@ static void vhci_stop(struct usb_hcd *hcd)
 
 static int vhci_get_frame_number(struct usb_hcd *hcd)
 {
-	pr_err("Not yet implemented\n");
+	dev_err_ratelimited(&hcd->self.root_hub->dev, "Not yet implemented\n");
 	return 0;
 }
 
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index b54f26c55dfd..b4b3e256491b 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -746,7 +746,7 @@ config ALIM7101_WDT
 
 config EBC_C384_WDT
 	tristate "WinSystems EBC-C384 Watchdog Timer"
-	depends on X86 && ISA
+	depends on X86 && ISA_BUS_API
 	select WATCHDOG_CORE
 	help
 	  Enables watchdog timer support for the watchdog timer on the
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index d46839f51e73..e4db19e88ab1 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -151,8 +151,6 @@ static DECLARE_WAIT_QUEUE_HEAD(balloon_wq);
 static void balloon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 
-static void release_memory_resource(struct resource *resource);
-
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
 #define GFP_BALLOON \
@@ -248,6 +246,19 @@ static enum bp_state update_schedule(enum bp_state state)
 }
 
 #ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG
+static void release_memory_resource(struct resource *resource)
+{
+	if (!resource)
+		return;
+
+	/*
+	 * No need to reset region to identity mapped since we now
+	 * know that no I/O can be in this region
+	 */
+	release_resource(resource);
+	kfree(resource);
+}
+
 static struct resource *additional_memory_resource(phys_addr_t size)
 {
 	struct resource *res;
@@ -286,19 +297,6 @@ static struct resource *additional_memory_resource(phys_addr_t size)
 	return res;
 }
 
-static void release_memory_resource(struct resource *resource)
-{
-	if (!resource)
-		return;
-
-	/*
-	 * No need to reset region to identity mapped since we now
-	 * know that no I/O can be in this region
-	 */
-	release_resource(resource);
-	kfree(resource);
-}
-
 static enum bp_state reserve_additional_memory(void)
 {
 	long credit;
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index 8e67336f8ddd..6a25533da237 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -183,8 +183,7 @@ int xen_pcibk_config_read(struct pci_dev *dev, int offset, int size,
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		 if (req_end > field_start && field_end > req_start) {
 			err = conf_space_read(dev, cfg_entry, field_start,
 					      &tmp_val);
 			if (err)
@@ -230,8 +229,7 @@ int xen_pcibk_config_write(struct pci_dev *dev, int offset, int size, u32 value)
 		field_start = OFFSET(cfg_entry);
 		field_end = OFFSET(cfg_entry) + field->size;
 
-		if ((req_start >= field_start && req_start < field_end)
-		    || (req_end > field_start && req_end <= field_end)) {
+		 if (req_end > field_start && field_end > req_start) {
 			tmp_val = 0;
 
 			err = xen_pcibk_config_read(dev, field_start,
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index ad3d17d29c81..9ead1c2ff1dd 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -145,7 +145,7 @@ static int rom_write(struct pci_dev *dev, int offset, u32 value, void *data)
 	/* A write to obtain the length must happen as a 32-bit write.
 	 * This does not (yet) support writing individual bytes
 	 */
-	if (value == ~PCI_ROM_ADDRESS_ENABLE)
+	if ((value | ~PCI_ROM_ADDRESS_MASK) == ~0U)
 		bar->which = 1;
 	else {
 		u32 tmpval;
@@ -225,38 +225,42 @@ static inline void read_dev_bar(struct pci_dev *dev,
 			   (PCI_BASE_ADDRESS_SPACE_MEMORY |
 				PCI_BASE_ADDRESS_MEM_TYPE_64))) {
 			bar_info->val = res[pos - 1].start >> 32;
-			bar_info->len_val = res[pos - 1].end >> 32;
+			bar_info->len_val = -resource_size(&res[pos - 1]) >> 32;
 			return;
 		}
 	}
 
+	if (!res[pos].flags ||
+	    (res[pos].flags & (IORESOURCE_DISABLED | IORESOURCE_UNSET |
+			       IORESOURCE_BUSY)))
+		return;
+
 	bar_info->val = res[pos].start |
 			(res[pos].flags & PCI_REGION_FLAG_MASK);
-	bar_info->len_val = resource_size(&res[pos]);
+	bar_info->len_val = -resource_size(&res[pos]) |
+			    (res[pos].flags & PCI_REGION_FLAG_MASK);
 }
 
 static void *bar_init(struct pci_dev *dev, int offset)
 {
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
 
 	if (!bar)
 		return ERR_PTR(-ENOMEM);
 
 	read_dev_bar(dev, bar, offset, ~0);
-	bar->which = 0;
 
 	return bar;
 }
 
 static void *rom_init(struct pci_dev *dev, int offset)
 {
-	struct pci_bar_info *bar = kmalloc(sizeof(*bar), GFP_KERNEL);
+	struct pci_bar_info *bar = kzalloc(sizeof(*bar), GFP_KERNEL);
 
 	if (!bar)
 		return ERR_PTR(-ENOMEM);
 
 	read_dev_bar(dev, bar, offset, ~PCI_ROM_ADDRESS_ENABLE);
-	bar->which = 0;
 
 	return bar;
 }