summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-04-19 11:53:42 +0200
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2019-04-19 11:53:42 +0200
commitcef62a615d22369d817757d1e4fe64fdf42a401e (patch)
treed5a2fdab8c0372a209b63c8b378bc129011be713
parentd358b1733fc33d9f0261ce07c3d328787652245d (diff)
parent9f201aba56b92c3daa4b76efae056ddbb80d91e6 (diff)
downloadlinux-cef62a615d22369d817757d1e4fe64fdf42a401e.tar.bz2
Merge tag 'misc-habanalabs-next-2019-04-19' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next
Oded writes: This tag contains many changes for kernel 5.2. The major changes are: - Add a new IOCTL for debug, profiling and trace operations on the device. This will allow the user to perform profiling and debugging of the deep learning topologies that are executing on the ASIC. - Add a shadow table for the ASIC's MMU page tables to avoid doing page table walks on the device's DRAM during map/unmap operations. - re-factor of ASIC-dependent code to be common code for all ASICs In addition, there are many small fixes and changes. The notable ones are: - Allow accessing the DRAM using virtual address through the debugFS interface. Until now, only physical addresses were valid, but that is useless for debugging when working with MMU. - Allow the user to modify the TPC clock relaxation value to better control TPC power consumption during topology execution. - Allow the user to inquire about the device's status (operational/Malfunction/in-reset) in the INFO IOCTL. - Improvements to the device's removal function, to prevent crash in case of force removal by the OS. - Prevent PTE read/write during hard-reset. This will improve stability of the device during hard-reset. * tag 'misc-habanalabs-next-2019-04-19' of git://people.freedesktop.org/~gabbayo/linux: (31 commits) habanalabs: prevent device PTE read/write during hard-reset habanalabs: improve IOCTLs behavior when disabled or reset habanalabs: all FD must be closed before removing device habanalabs: split mmu/no-mmu code paths in memory ioctl habanalabs: ASIC_AUTO_DETECT enum value is redundant habanalabs: refactoring in goya.c uapi/habanalabs: fix some comments in uapi file habanalabs: add goya implementation for debug configuration habanalabs: add new IOCTL for debug, tracing and profiling habanalabs: remove extra semicolon habanalabs: prevent CPU soft lockup on Palladium habanalabs: remove trailing blank line from EOF habanalabs: improve error messages habanalabs: add device status option to INFO IOCTL habanalabs: allow user to modify TPC clock relaxation value habanalabs: set new golden value to tpc clock relaxation habanalabs: never fail hard reset of device habanalabs: keep track of the device's dma mask habanalabs: add MMU shadow mapping habanalabs: Allow accessing DRAM virtual addresses via debugfs ...
-rw-r--r--drivers/misc/habanalabs/Makefile2
-rw-r--r--drivers/misc/habanalabs/command_buffer.c7
-rw-r--r--drivers/misc/habanalabs/command_submission.c5
-rw-r--r--drivers/misc/habanalabs/debugfs.c96
-rw-r--r--drivers/misc/habanalabs/device.c77
-rw-r--r--drivers/misc/habanalabs/firmware_if.c325
-rw-r--r--drivers/misc/habanalabs/goya/Makefile3
-rw-r--r--drivers/misc/habanalabs/goya/goya.c884
-rw-r--r--drivers/misc/habanalabs/goya/goyaP.h47
-rw-r--r--drivers/misc/habanalabs/goya/goya_coresight.c620
-rw-r--r--drivers/misc/habanalabs/goya/goya_security.c15
-rw-r--r--drivers/misc/habanalabs/habanalabs.h110
-rw-r--r--drivers/misc/habanalabs/habanalabs_drv.c7
-rw-r--r--drivers/misc/habanalabs/habanalabs_ioctl.c139
-rw-r--r--drivers/misc/habanalabs/include/armcp_if.h10
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h12
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h3
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/pcie_wrap_regs.h306
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h1
-rw-r--r--drivers/misc/habanalabs/include/goya/goya.h4
-rw-r--r--drivers/misc/habanalabs/include/goya/goya_async_events.h9
-rw-r--r--drivers/misc/habanalabs/include/goya/goya_coresight.h199
-rw-r--r--drivers/misc/habanalabs/include/goya/goya_fw_if.h2
-rw-r--r--drivers/misc/habanalabs/include/hl_boot_if.h3
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h16
-rw-r--r--drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h23
-rw-r--r--drivers/misc/habanalabs/memory.c193
-rw-r--r--drivers/misc/habanalabs/mmu.c600
-rw-r--r--drivers/misc/habanalabs/pci.c402
-rw-r--r--include/uapi/misc/habanalabs.h158
120 files changed, 3118 insertions, 1250 deletions
diff --git a/drivers/misc/habanalabs/Makefile b/drivers/misc/habanalabs/Makefile
index c6592db59b25..f8e85243d672 100644
--- a/drivers/misc/habanalabs/Makefile
+++ b/drivers/misc/habanalabs/Makefile
@@ -6,7 +6,7 @@ obj-m := habanalabs.o
habanalabs-y := habanalabs_drv.o device.o context.o asid.o habanalabs_ioctl.o \
command_buffer.o hw_queue.o irq.o sysfs.o hwmon.o memory.o \
- command_submission.o mmu.o
+ command_submission.o mmu.o firmware_if.o pci.o
habanalabs-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/misc/habanalabs/command_buffer.c b/drivers/misc/habanalabs/command_buffer.c
index 85f75806a9a7..b1ffca47d748 100644
--- a/drivers/misc/habanalabs/command_buffer.c
+++ b/drivers/misc/habanalabs/command_buffer.c
@@ -214,6 +214,13 @@ int hl_cb_ioctl(struct hl_fpriv *hpriv, void *data)
u64 handle;
int rc;
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't execute CB IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ return -EBUSY;
+ }
+
switch (args->in.op) {
case HL_CB_OP_CREATE:
rc = hl_cb_create(hdev, &hpriv->cb_mgr, args->in.cb_size,
diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c
index 19c84214a7ea..02c48da0b645 100644
--- a/drivers/misc/habanalabs/command_submission.c
+++ b/drivers/misc/habanalabs/command_submission.c
@@ -261,7 +261,8 @@ static void cs_timedout(struct work_struct *work)
ctx_asid = cs->ctx->asid;
/* TODO: add information about last signaled seq and last emitted seq */
- dev_err(hdev->dev, "CS %d.%llu got stuck!\n", ctx_asid, cs->sequence);
+ dev_err(hdev->dev, "User %d command submission %llu got stuck!\n",
+ ctx_asid, cs->sequence);
cs_put(cs);
@@ -604,7 +605,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
bool need_soft_reset = false;
if (hl_device_disabled_or_in_reset(hdev)) {
- dev_warn(hdev->dev,
+ dev_warn_ratelimited(hdev->dev,
"Device is %s. Can't submit new CS\n",
atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
rc = -EBUSY;
diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c
index 974a87789bd8..a4447699ff4e 100644
--- a/drivers/misc/habanalabs/debugfs.c
+++ b/drivers/misc/habanalabs/debugfs.c
@@ -505,22 +505,97 @@ err:
return -EINVAL;
}
+static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
+ u64 *phys_addr)
+{
+ struct hl_ctx *ctx = hdev->user_ctx;
+ u64 hop_addr, hop_pte_addr, hop_pte;
+ int rc = 0;
+
+ if (!ctx) {
+ dev_err(hdev->dev, "no ctx available\n");
+ return -EINVAL;
+ }
+
+ mutex_lock(&ctx->mmu_lock);
+
+ /* hop 0 */
+ hop_addr = get_hop0_addr(ctx);
+ hop_pte_addr = get_hop0_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ /* hop 1 */
+ hop_addr = get_next_hop_addr(hop_pte);
+ if (hop_addr == ULLONG_MAX)
+ goto not_mapped;
+ hop_pte_addr = get_hop1_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ /* hop 2 */
+ hop_addr = get_next_hop_addr(hop_pte);
+ if (hop_addr == ULLONG_MAX)
+ goto not_mapped;
+ hop_pte_addr = get_hop2_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ /* hop 3 */
+ hop_addr = get_next_hop_addr(hop_pte);
+ if (hop_addr == ULLONG_MAX)
+ goto not_mapped;
+ hop_pte_addr = get_hop3_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+
+ if (!(hop_pte & LAST_MASK)) {
+ /* hop 4 */
+ hop_addr = get_next_hop_addr(hop_pte);
+ if (hop_addr == ULLONG_MAX)
+ goto not_mapped;
+ hop_pte_addr = get_hop4_pte_addr(ctx, hop_addr, virt_addr);
+ hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr);
+ }
+
+ if (!(hop_pte & PAGE_PRESENT_MASK))
+ goto not_mapped;
+
+ *phys_addr = (hop_pte & PTE_PHYS_ADDR_MASK) | (virt_addr & OFFSET_MASK);
+
+ goto out;
+
+not_mapped:
+ dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
+ virt_addr);
+ rc = -EINVAL;
+out:
+ mutex_unlock(&ctx->mmu_lock);
+ return rc;
+}
+
static ssize_t hl_data_read32(struct file *f, char __user *buf,
size_t count, loff_t *ppos)
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
char tmp_buf[32];
+ u64 addr = entry->addr;
u32 val;
ssize_t rc;
if (*ppos)
return 0;
- rc = hdev->asic_funcs->debugfs_read32(hdev, entry->addr, &val);
+ if (addr >= prop->va_space_dram_start_address &&
+ addr < prop->va_space_dram_end_address &&
+ hdev->mmu_enable &&
+ hdev->dram_supports_virtual_memory) {
+ rc = device_va_to_pa(hdev, entry->addr, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hdev->asic_funcs->debugfs_read32(hdev, addr, &val);
if (rc) {
- dev_err(hdev->dev, "Failed to read from 0x%010llx\n",
- entry->addr);
+ dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
return rc;
}
@@ -536,6 +611,8 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
{
struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
struct hl_device *hdev = entry->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 addr = entry->addr;
u32 value;
ssize_t rc;
@@ -543,10 +620,19 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
if (rc)
return rc;
- rc = hdev->asic_funcs->debugfs_write32(hdev, entry->addr, value);
+ if (addr >= prop->va_space_dram_start_address &&
+ addr < prop->va_space_dram_end_address &&
+ hdev->mmu_enable &&
+ hdev->dram_supports_virtual_memory) {
+ rc = device_va_to_pa(hdev, entry->addr, &addr);
+ if (rc)
+ return rc;
+ }
+
+ rc = hdev->asic_funcs->debugfs_write32(hdev, addr, value);
if (rc) {
dev_err(hdev->dev, "Failed to write 0x%08x to 0x%010llx\n",
- value, entry->addr);
+ value, addr);
return rc;
}
diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c
index 77d51be66c7e..25bfb093ff26 100644
--- a/drivers/misc/habanalabs/device.c
+++ b/drivers/misc/habanalabs/device.c
@@ -10,6 +10,7 @@
#include <linux/pci.h>
#include <linux/sched/signal.h>
#include <linux/hwmon.h>
+#include <uapi/misc/habanalabs.h>
#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10)
@@ -21,6 +22,20 @@ bool hl_device_disabled_or_in_reset(struct hl_device *hdev)
return false;
}
+enum hl_device_status hl_device_status(struct hl_device *hdev)
+{
+ enum hl_device_status status;
+
+ if (hdev->disabled)
+ status = HL_DEVICE_STATUS_MALFUNCTION;
+ else if (atomic_read(&hdev->in_reset))
+ status = HL_DEVICE_STATUS_IN_RESET;
+ else
+ status = HL_DEVICE_STATUS_OPERATIONAL;
+
+ return status;
+};
+
static void hpriv_release(struct kref *ref)
{
struct hl_fpriv *hpriv;
@@ -498,11 +513,8 @@ disable_device:
return rc;
}
-static void hl_device_hard_reset_pending(struct work_struct *work)
+static void device_kill_open_processes(struct hl_device *hdev)
{
- struct hl_device_reset_work *device_reset_work =
- container_of(work, struct hl_device_reset_work, reset_work);
- struct hl_device *hdev = device_reset_work->hdev;
u16 pending_total, pending_cnt;
struct task_struct *task = NULL;
@@ -537,6 +549,12 @@ static void hl_device_hard_reset_pending(struct work_struct *work)
}
}
+ /* We killed the open users, but because the driver cleans up after the
+ * user contexts are closed (e.g. mmu mappings), we need to wait again
+ * to make sure the cleaning phase is finished before continuing with
+ * the reset
+ */
+
pending_cnt = pending_total;
while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) {
@@ -552,6 +570,16 @@ static void hl_device_hard_reset_pending(struct work_struct *work)
mutex_unlock(&hdev->fd_open_cnt_lock);
+}
+
+static void device_hard_reset_pending(struct work_struct *work)
+{
+ struct hl_device_reset_work *device_reset_work =
+ container_of(work, struct hl_device_reset_work, reset_work);
+ struct hl_device *hdev = device_reset_work->hdev;
+
+ device_kill_open_processes(hdev);
+
hl_device_reset(hdev, true, true);
kfree(device_reset_work);
@@ -613,6 +641,8 @@ again:
if ((hard_reset) && (!from_hard_reset_thread)) {
struct hl_device_reset_work *device_reset_work;
+ hdev->hard_reset_pending = true;
+
if (!hdev->pdev) {
dev_err(hdev->dev,
"Reset action is NOT supported in simulator\n");
@@ -620,8 +650,6 @@ again:
goto out_err;
}
- hdev->hard_reset_pending = true;
-
device_reset_work = kzalloc(sizeof(*device_reset_work),
GFP_ATOMIC);
if (!device_reset_work) {
@@ -635,7 +663,7 @@ again:
* from a dedicated work
*/
INIT_WORK(&device_reset_work->reset_work,
- hl_device_hard_reset_pending);
+ device_hard_reset_pending);
device_reset_work->hdev = hdev;
schedule_work(&device_reset_work->reset_work);
@@ -663,17 +691,9 @@ again:
/* Go over all the queues, release all CS and their jobs */
hl_cs_rollback_all(hdev);
- if (hard_reset) {
- /* Release kernel context */
- if (hl_ctx_put(hdev->kernel_ctx) != 1) {
- dev_err(hdev->dev,
- "kernel ctx is alive during hard reset\n");
- rc = -EBUSY;
- goto out_err;
- }
-
+ /* Release kernel context */
+ if ((hard_reset) && (hl_ctx_put(hdev->kernel_ctx) == 1))
hdev->kernel_ctx = NULL;
- }
/* Reset the H/W. It will be in idle state after this returns */
hdev->asic_funcs->hw_fini(hdev, hard_reset);
@@ -698,6 +718,14 @@ again:
if (hard_reset) {
hdev->device_cpu_disabled = false;
+ hdev->hard_reset_pending = false;
+
+ if (hdev->kernel_ctx) {
+ dev_crit(hdev->dev,
+ "kernel ctx was alive during hard reset, something is terribly wrong\n");
+ rc = -EBUSY;
+ goto out_err;
+ }
/* Allocate the kernel context */
hdev->kernel_ctx = kzalloc(sizeof(*hdev->kernel_ctx),
@@ -752,8 +780,6 @@ again:
}
hl_set_max_power(hdev, hdev->max_power);
-
- hdev->hard_reset_pending = false;
} else {
rc = hdev->asic_funcs->soft_reset_late_init(hdev);
if (rc) {
@@ -1030,11 +1056,22 @@ void hl_device_fini(struct hl_device *hdev)
WARN(1, "Failed to remove device because reset function did not finish\n");
return;
}
- };
+ }
/* Mark device as disabled */
hdev->disabled = true;
+ /*
+ * Flush anyone that is inside the critical section of enqueue
+ * jobs to the H/W
+ */
+ hdev->asic_funcs->hw_queues_lock(hdev);
+ hdev->asic_funcs->hw_queues_unlock(hdev);
+
+ hdev->hard_reset_pending = true;
+
+ device_kill_open_processes(hdev);
+
hl_hwmon_fini(hdev);
device_late_fini(hdev);
diff --git a/drivers/misc/habanalabs/firmware_if.c b/drivers/misc/habanalabs/firmware_if.c
new file mode 100644
index 000000000000..1acf82650b20
--- /dev/null
+++ b/drivers/misc/habanalabs/firmware_if.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+
+#include <linux/firmware.h>
+#include <linux/genalloc.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+
+/**
+ * hl_fw_push_fw_to_device() - Push FW code to device.
+ * @hdev: pointer to hl_device structure.
+ *
+ * Copy fw code from firmware file to device memory.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
+ void __iomem *dst)
+{
+ const struct firmware *fw;
+ const u64 *fw_data;
+ size_t fw_size, i;
+ int rc;
+
+ rc = request_firmware(&fw, fw_name, hdev->dev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to request %s\n", fw_name);
+ goto out;
+ }
+
+ fw_size = fw->size;
+ if ((fw_size % 4) != 0) {
+ dev_err(hdev->dev, "illegal %s firmware size %zu\n",
+ fw_name, fw_size);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
+
+ fw_data = (const u64 *) fw->data;
+
+ if ((fw->size % 8) != 0)
+ fw_size -= 8;
+
+ for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
+ if (!(i & (0x80000 - 1))) {
+ dev_dbg(hdev->dev,
+ "copied so far %zu out of %zu for %s firmware",
+ i, fw_size, fw_name);
+ usleep_range(20, 100);
+ }
+
+ writeq(*fw_data, dst);
+ }
+
+ if ((fw->size % 8) != 0)
+ writel(*(const u32 *) fw_data, dst);
+
+out:
+ release_firmware(fw);
+ return rc;
+}
+
+int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
+{
+ struct armcp_packet pkt = {};
+
+ pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
+
+ return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
+ sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
+}
+
+int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
+ u16 len, u32 timeout, long *result)
+{
+ struct armcp_packet *pkt;
+ dma_addr_t pkt_dma_addr;
+ u32 tmp;
+ int rc = 0;
+
+ if (len > HL_CPU_CB_SIZE) {
+ dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
+ len);
+ return -ENOMEM;
+ }
+
+ pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
+ &pkt_dma_addr);
+ if (!pkt) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for packet to CPU\n");
+ return -ENOMEM;
+ }
+
+ memcpy(pkt, msg, len);
+
+ mutex_lock(&hdev->send_cpu_message_lock);
+
+ if (hdev->disabled)
+ goto out;
+
+ if (hdev->device_cpu_disabled) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
+ goto out;
+ }
+
+ rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence,
+ timeout, &tmp);
+
+ hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
+
+ if (rc == -ETIMEDOUT) {
+ dev_err(hdev->dev, "Timeout while waiting for device CPU\n");
+ hdev->device_cpu_disabled = true;
+ goto out;
+ }
+
+ if (tmp == ARMCP_PACKET_FENCE_VAL) {
+ u32 ctl = le32_to_cpu(pkt->ctl);
+
+ rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
+ if (rc) {
+ dev_err(hdev->dev,
+ "F/W ERROR %d for CPU packet %d\n",
+ rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK)
+ >> ARMCP_PKT_CTL_OPCODE_SHIFT);
+ rc = -EINVAL;
+ } else if (result) {
+ *result = (long) le64_to_cpu(pkt->result);
+ }
+ } else {
+ dev_err(hdev->dev, "CPU packet wrong fence value\n");
+ rc = -EINVAL;
+ }
+
+out:
+ mutex_unlock(&hdev->send_cpu_message_lock);
+
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
+
+ return rc;
+}
+
+int hl_fw_test_cpu_queue(struct hl_device *hdev)
+{
+ struct armcp_packet test_pkt = {};
+ long result;
+ int rc;
+
+ test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
+ sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+
+ if (!rc) {
+ if (result == ARMCP_PACKET_FENCE_VAL)
+ dev_info(hdev->dev,
+ "queue test on CPU queue succeeded\n");
+ else
+ dev_err(hdev->dev,
+ "CPU queue test failed (0x%08lX)\n", result);
+ } else {
+ dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
+ }
+
+ return rc;
+}
+
+void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle)
+{
+ u64 kernel_addr;
+
+ /* roundup to HL_CPU_PKT_SIZE */
+ size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
+
+ kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
+
+ *dma_handle = hdev->cpu_accessible_dma_address +
+ (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
+
+ return (void *) (uintptr_t) kernel_addr;
+}
+
+void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+ void *vaddr)
+{
+ /* roundup to HL_CPU_PKT_SIZE */
+ size = (size + (HL_CPU_PKT_SIZE - 1)) & HL_CPU_PKT_MASK;
+
+ gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
+ size);
+}
+
+int hl_fw_send_heartbeat(struct hl_device *hdev)
+{
+ struct armcp_packet hb_pkt = {};
+ long result;
+ int rc;
+
+ hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
+ sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+
+ if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
+ rc = -EIO;
+
+ return rc;
+}
+
+int hl_fw_armcp_info_get(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct armcp_packet pkt = {};
+ void *armcp_info_cpu_addr;
+ dma_addr_t armcp_info_dma_addr;
+ long result;
+ int rc;
+
+ armcp_info_cpu_addr =
+ hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ sizeof(struct armcp_info),
+ &armcp_info_dma_addr);
+ if (!armcp_info_cpu_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for ArmCP info packet\n");
+ return -ENOMEM;
+ }
+
+ memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.addr = cpu_to_le64(armcp_info_dma_addr +
+ prop->host_phys_base_address);
+ pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_ARMCP_INFO_TIMEOUT_USEC, &result);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to send armcp info pkt, error %d\n", rc);
+ goto out;
+ }
+
+ memcpy(&prop->armcp_info, armcp_info_cpu_addr,
+ sizeof(prop->armcp_info));
+
+ rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to build hwmon channel info, error %d\n", rc);
+ rc = -EFAULT;
+ goto out;
+ }
+
+out:
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
+ sizeof(struct armcp_info), armcp_info_cpu_addr);
+
+ return rc;
+}
+
+int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ struct armcp_packet pkt = {};
+ void *eeprom_info_cpu_addr;
+ dma_addr_t eeprom_info_dma_addr;
+ long result;
+ int rc;
+
+ eeprom_info_cpu_addr =
+ hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
+ max_size, &eeprom_info_dma_addr);
+ if (!eeprom_info_cpu_addr) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for EEPROM info packet\n");
+ return -ENOMEM;
+ }
+
+ memset(eeprom_info_cpu_addr, 0, max_size);
+
+ pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
+ ARMCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.addr = cpu_to_le64(eeprom_info_dma_addr +
+ prop->host_phys_base_address);
+ pkt.data_max_size = cpu_to_le32(max_size);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ HL_ARMCP_EEPROM_TIMEOUT_USEC, &result);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to send armcp EEPROM pkt, error %d\n", rc);
+ goto out;
+ }
+
+ /* result contains the actual size */
+ memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
+
+out:
+ hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
+ eeprom_info_cpu_addr);
+
+ return rc;
+}
diff --git a/drivers/misc/habanalabs/goya/Makefile b/drivers/misc/habanalabs/goya/Makefile
index e458e5ba500b..131432f677e2 100644
--- a/drivers/misc/habanalabs/goya/Makefile
+++ b/drivers/misc/habanalabs/goya/Makefile
@@ -1,3 +1,4 @@
subdir-ccflags-y += -I$(src)
-HL_GOYA_FILES := goya/goya.o goya/goya_security.o goya/goya_hwmgr.o
+HL_GOYA_FILES := goya/goya.o goya/goya_security.o goya/goya_hwmgr.o \
+ goya/goya_coresight.o
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index ea979ebd62fb..5100dfbf3acc 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -12,10 +12,8 @@
#include <linux/pci.h>
#include <linux/genalloc.h>
-#include <linux/firmware.h>
#include <linux/hwmon.h>
#include <linux/io-64-nonatomic-lo-hi.h>
-#include <linux/io-64-nonatomic-hi-lo.h>
/*
* GOYA security scheme:
@@ -71,7 +69,7 @@
*
*/
-#define GOYA_MMU_REGS_NUM 61
+#define GOYA_MMU_REGS_NUM 63
#define GOYA_DMA_POOL_BLK_SIZE 0x100 /* 256 bytes */
@@ -80,15 +78,12 @@
#define GOYA_RESET_WAIT_MSEC 1 /* 1ms */
#define GOYA_CPU_RESET_WAIT_MSEC 100 /* 100ms */
#define GOYA_PLDM_RESET_WAIT_MSEC 1000 /* 1s */
-#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */
#define GOYA_TEST_QUEUE_WAIT_USEC 100000 /* 100ms */
#define GOYA_PLDM_MMU_TIMEOUT_USEC (MMU_CONFIG_TIMEOUT_USEC * 100)
#define GOYA_PLDM_QMAN0_TIMEOUT_USEC (HL_DEVICE_TIMEOUT_USEC * 30)
#define GOYA_QMAN0_FENCE_VAL 0xD169B243
-#define GOYA_MAX_INITIATORS 20
-
#define GOYA_MAX_STRING_LEN 20
#define GOYA_CB_POOL_CB_CNT 512
@@ -173,12 +168,12 @@ static u64 goya_mmu_regs[GOYA_MMU_REGS_NUM] = {
mmMME_SBA_CONTROL_DATA,
mmMME_SBB_CONTROL_DATA,
mmMME_SBC_CONTROL_DATA,
- mmMME_WBC_CONTROL_DATA
+ mmMME_WBC_CONTROL_DATA,
+ mmPCIE_WRAP_PSOC_ARUSER,
+ mmPCIE_WRAP_PSOC_AWUSER
};
-#define GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE 121
-
-static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
+static u32 goya_all_events[] = {
GOYA_ASYNC_EVENT_ID_PCIE_IF,
GOYA_ASYNC_EVENT_ID_TPC0_ECC,
GOYA_ASYNC_EVENT_ID_TPC1_ECC,
@@ -302,13 +297,6 @@ static u32 goya_non_fatal_events[GOYA_ASYC_EVENT_GROUP_NON_FATAL_SIZE] = {
GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
};
-static int goya_armcp_info_get(struct hl_device *hdev);
-static void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
-static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
-static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
-static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
- u64 phys_addr);
-
static void goya_get_fixed_properties(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -367,24 +355,13 @@ static void goya_get_fixed_properties(struct hl_device *hdev)
prop->cfg_size = CFG_SIZE;
prop->max_asid = MAX_ASID;
prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
+ prop->high_pll = PLL_HIGH_DEFAULT;
prop->cb_pool_cb_cnt = GOYA_CB_POOL_CB_CNT;
prop->cb_pool_cb_size = GOYA_CB_POOL_CB_SIZE;
prop->max_power_default = MAX_POWER_DEFAULT;
prop->tpc_enabled_mask = TPC_ENABLED_MASK;
-
- prop->high_pll = PLL_HIGH_DEFAULT;
-}
-
-int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
-{
- struct armcp_packet pkt;
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(opcode << ARMCP_PKT_CTL_OPCODE_SHIFT);
-
- return hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt,
- sizeof(pkt), HL_DEVICE_TIMEOUT_USEC, NULL);
+ prop->pcie_dbi_base_address = mmPCIE_DBI_BASE;
+ prop->pcie_aux_dbi_reg_addr = CFG_BASE + mmPCIE_AUX_DBI;
}
/*
@@ -398,159 +375,18 @@ int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
*/
static int goya_pci_bars_map(struct hl_device *hdev)
{
- struct pci_dev *pdev = hdev->pdev;
+ static const char * const name[] = {"SRAM_CFG", "MSIX", "DDR"};
+ bool is_wc[3] = {false, false, true};
int rc;
- rc = pci_request_regions(pdev, HL_NAME);
- if (rc) {
- dev_err(hdev->dev, "Cannot obtain PCI resources\n");
+ rc = hl_pci_bars_map(hdev, name, is_wc);
+ if (rc)
return rc;
- }
-
- hdev->pcie_bar[SRAM_CFG_BAR_ID] =
- pci_ioremap_bar(pdev, SRAM_CFG_BAR_ID);
- if (!hdev->pcie_bar[SRAM_CFG_BAR_ID]) {
- dev_err(hdev->dev, "pci_ioremap_bar failed for CFG\n");
- rc = -ENODEV;
- goto err_release_regions;
- }
-
- hdev->pcie_bar[MSIX_BAR_ID] = pci_ioremap_bar(pdev, MSIX_BAR_ID);
- if (!hdev->pcie_bar[MSIX_BAR_ID]) {
- dev_err(hdev->dev, "pci_ioremap_bar failed for MSIX\n");
- rc = -ENODEV;
- goto err_unmap_sram_cfg;
- }
-
- hdev->pcie_bar[DDR_BAR_ID] = pci_ioremap_wc_bar(pdev, DDR_BAR_ID);
- if (!hdev->pcie_bar[DDR_BAR_ID]) {
- dev_err(hdev->dev, "pci_ioremap_bar failed for DDR\n");
- rc = -ENODEV;
- goto err_unmap_msix;
- }
hdev->rmmio = hdev->pcie_bar[SRAM_CFG_BAR_ID] +
- (CFG_BASE - SRAM_BASE_ADDR);
+ (CFG_BASE - SRAM_BASE_ADDR);
return 0;
-
-err_unmap_msix:
- iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
-err_unmap_sram_cfg:
- iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
-err_release_regions:
- pci_release_regions(pdev);
-
- return rc;
-}
-
-/*
- * goya_pci_bars_unmap - Unmap PCI BARS of Goya device
- *
- * @hdev: pointer to hl_device structure
- *
- * Release all PCI BARS and unmap their virtual addresses
- *
- */
-static void goya_pci_bars_unmap(struct hl_device *hdev)
-{
- struct pci_dev *pdev = hdev->pdev;
-
- iounmap(hdev->pcie_bar[DDR_BAR_ID]);
- iounmap(hdev->pcie_bar[MSIX_BAR_ID]);
- iounmap(hdev->pcie_bar[SRAM_CFG_BAR_ID]);
- pci_release_regions(pdev);
-}
-
-/*
- * goya_elbi_write - Write through the ELBI interface
- *
- * @hdev: pointer to hl_device structure
- *
- * return 0 on success, -1 on failure
- *
- */
-static int goya_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
-{
- struct pci_dev *pdev = hdev->pdev;
- ktime_t timeout;
- u32 val;
-
- /* Clear previous status */
- pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
-
- pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
- pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
- pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
- PCI_CONFIG_ELBI_CTRL_WRITE);
-
- timeout = ktime_add_ms(ktime_get(), 10);
- for (;;) {
- pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
- if (val & PCI_CONFIG_ELBI_STS_MASK)
- break;
- if (ktime_compare(ktime_get(), timeout) > 0) {
- pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
- &val);
- break;
- }
- usleep_range(300, 500);
- }
-
- if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
- return 0;
-
- if (val & PCI_CONFIG_ELBI_STS_ERR) {
- dev_err(hdev->dev, "Error writing to ELBI\n");
- return -EIO;
- }
-
- if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
- dev_err(hdev->dev, "ELBI write didn't finish in time\n");
- return -EIO;
- }
-
- dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
- return -EIO;
-}
-
-/*
- * goya_iatu_write - iatu write routine
- *
- * @hdev: pointer to hl_device structure
- *
- */
-static int goya_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
-{
- u32 dbi_offset;
- int rc;
-
- dbi_offset = addr & 0xFFF;
-
- rc = goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0x00300000);
- rc |= goya_elbi_write(hdev, mmPCIE_DBI_BASE + dbi_offset, data);
-
- if (rc)
- return -EIO;
-
- return 0;
-}
-
-static void goya_reset_link_through_bridge(struct hl_device *hdev)
-{
- struct pci_dev *pdev = hdev->pdev;
- struct pci_dev *parent_port;
- u16 val;
-
- parent_port = pdev->bus->self;
- pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
- val |= PCI_BRIDGE_CTL_BUS_RESET;
- pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
- ssleep(1);
-
- val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
- pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
- ssleep(3);
}
/*
@@ -572,20 +408,9 @@ static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
return 0;
/* Inbound Region 1 - Bar 4 - Point to DDR */
- rc = goya_iatu_write(hdev, 0x314, lower_32_bits(addr));
- rc |= goya_iatu_write(hdev, 0x318, upper_32_bits(addr));
- rc |= goya_iatu_write(hdev, 0x300, 0);
- /* Enable + Bar match + match enable + Bar 4 */
- rc |= goya_iatu_write(hdev, 0x304, 0xC0080400);
-
- /* Return the DBI window to the default location */
- rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
- rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
-
- if (rc) {
- dev_err(hdev->dev, "failed to map DDR bar to 0x%08llx\n", addr);
- return -EIO;
- }
+ rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
+ if (rc)
+ return rc;
if (goya)
goya->ddr_bar_cur_addr = addr;
@@ -603,40 +428,8 @@ static int goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
*/
static int goya_init_iatu(struct hl_device *hdev)
{
- int rc;
-
- /* Inbound Region 0 - Bar 0 - Point to SRAM_BASE_ADDR */
- rc = goya_iatu_write(hdev, 0x114, lower_32_bits(SRAM_BASE_ADDR));
- rc |= goya_iatu_write(hdev, 0x118, upper_32_bits(SRAM_BASE_ADDR));
- rc |= goya_iatu_write(hdev, 0x100, 0);
- /* Enable + Bar match + match enable */
- rc |= goya_iatu_write(hdev, 0x104, 0xC0080000);
-
- /* Inbound Region 1 - Bar 4 - Point to DDR */
- rc |= goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE);
-
- /* Outbound Region 0 - Point to Host */
- rc |= goya_iatu_write(hdev, 0x008, lower_32_bits(HOST_PHYS_BASE));
- rc |= goya_iatu_write(hdev, 0x00C, upper_32_bits(HOST_PHYS_BASE));
- rc |= goya_iatu_write(hdev, 0x010,
- lower_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
- rc |= goya_iatu_write(hdev, 0x014, 0);
- rc |= goya_iatu_write(hdev, 0x018, 0);
- rc |= goya_iatu_write(hdev, 0x020,
- upper_32_bits(HOST_PHYS_BASE + HOST_PHYS_SIZE - 1));
- /* Increase region size */
- rc |= goya_iatu_write(hdev, 0x000, 0x00002000);
- /* Enable */
- rc |= goya_iatu_write(hdev, 0x004, 0x80000000);
-
- /* Return the DBI window to the default location */
- rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI, 0);
- rc |= goya_elbi_write(hdev, CFG_BASE + mmPCIE_AUX_DBI_32, 0);
-
- if (rc)
- return -EIO;
-
- return 0;
+ return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
+ HOST_PHYS_SIZE);
}
/*
@@ -682,52 +475,9 @@ static int goya_early_init(struct hl_device *hdev)
prop->dram_pci_bar_size = pci_resource_len(pdev, DDR_BAR_ID);
- /* set DMA mask for GOYA */
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
- if (rc) {
- dev_warn(hdev->dev, "Unable to set pci dma mask to 39 bits\n");
- rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
- if (rc) {
- dev_err(hdev->dev,
- "Unable to set pci dma mask to 32 bits\n");
- return rc;
- }
- }
-
- rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
- if (rc) {
- dev_warn(hdev->dev,
- "Unable to set pci consistent dma mask to 39 bits\n");
- rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
- if (rc) {
- dev_err(hdev->dev,
- "Unable to set pci consistent dma mask to 32 bits\n");
- return rc;
- }
- }
-
- if (hdev->reset_pcilink)
- goya_reset_link_through_bridge(hdev);
-
- rc = pci_enable_device_mem(pdev);
- if (rc) {
- dev_err(hdev->dev, "can't enable PCI device\n");
+ rc = hl_pci_init(hdev, 39);
+ if (rc)
return rc;
- }
-
- pci_set_master(pdev);
-
- rc = goya_init_iatu(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to initialize iATU\n");
- goto disable_device;
- }
-
- rc = goya_pci_bars_map(hdev);
- if (rc) {
- dev_err(hdev->dev, "Failed to initialize PCI BARS\n");
- goto disable_device;
- }
if (!hdev->pldm) {
val = RREG32(mmPSOC_GLOBAL_CONF_BOOT_STRAP_PINS);
@@ -737,12 +487,6 @@ static int goya_early_init(struct hl_device *hdev)
}
return 0;
-
-disable_device:
- pci_clear_master(pdev);
- pci_disable_device(pdev);
-
- return rc;
}
/*
@@ -755,14 +499,33 @@ disable_device:
*/
static int goya_early_fini(struct hl_device *hdev)
{
- goya_pci_bars_unmap(hdev);
-
- pci_clear_master(hdev->pdev);
- pci_disable_device(hdev->pdev);
+ hl_pci_fini(hdev);
return 0;
}
+static void goya_mmu_prepare_reg(struct hl_device *hdev, u64 reg, u32 asid)
+{
+ /* mask to zero the MMBP and ASID bits */
+ WREG32_AND(reg, ~0x7FF);
+ WREG32_OR(reg, asid);
+}
+
+static void goya_qman0_set_security(struct hl_device *hdev, bool secure)
+{
+ struct goya_device *goya = hdev->asic_specific;
+
+ if (!(goya->hw_cap_initialized & HW_CAP_MMU))
+ return;
+
+ if (secure)
+ WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
+ else
+ WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
+
+ RREG32(mmDMA_QM_0_GLBL_PROT);
+}
+
/*
* goya_fetch_psoc_frequency - Fetch PSOC frequency values
*
@@ -789,10 +552,9 @@ static void goya_fetch_psoc_frequency(struct hl_device *hdev)
static int goya_late_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct goya_device *goya = hdev->asic_specific;
int rc;
- rc = goya->armcp_info_get(hdev);
+ rc = goya_armcp_info_get(hdev);
if (rc) {
dev_err(hdev->dev, "Failed to get armcp info\n");
return rc;
@@ -804,7 +566,7 @@ static int goya_late_init(struct hl_device *hdev)
*/
WREG32(mmMMU_LOG2_DDR_SIZE, ilog2(prop->dram_size));
- rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
+ rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS);
if (rc) {
dev_err(hdev->dev, "Failed to enable PCI access from CPU\n");
return rc;
@@ -830,7 +592,7 @@ static int goya_late_init(struct hl_device *hdev)
return 0;
disable_pci_access:
- goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+ hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
return rc;
}
@@ -879,9 +641,6 @@ static int goya_sw_init(struct hl_device *hdev)
if (!goya)
return -ENOMEM;
- goya->test_cpu_queue = goya_test_cpu_queue;
- goya->armcp_info_get = goya_armcp_info_get;
-
/* according to goya_init_iatu */
goya->ddr_bar_cur_addr = DRAM_PHYS_BASE;
@@ -889,6 +648,9 @@ static int goya_sw_init(struct hl_device *hdev)
goya->tpc_clk = GOYA_PLL_FREQ_LOW;
goya->ic_clk = GOYA_PLL_FREQ_LOW;
+ goya->mmu_prepare_reg = goya_mmu_prepare_reg;
+ goya->qman0_set_security = goya_qman0_set_security;
+
hdev->asic_specific = goya;
/* Create DMA pool for small allocations */
@@ -902,19 +664,16 @@ static int goya_sw_init(struct hl_device *hdev)
hdev->cpu_accessible_dma_mem =
hdev->asic_funcs->dma_alloc_coherent(hdev,
- CPU_ACCESSIBLE_MEM_SIZE,
+ HL_CPU_ACCESSIBLE_MEM_SIZE,
&hdev->cpu_accessible_dma_address,
GFP_KERNEL | __GFP_ZERO);
if (!hdev->cpu_accessible_dma_mem) {
- dev_err(hdev->dev,
- "failed to allocate %d of dma memory for CPU accessible memory space\n",
- CPU_ACCESSIBLE_MEM_SIZE);
rc = -ENOMEM;
goto free_dma_pool;
}
- hdev->cpu_accessible_dma_pool = gen_pool_create(CPU_PKT_SHIFT, -1);
+ hdev->cpu_accessible_dma_pool = gen_pool_create(HL_CPU_PKT_SHIFT, -1);
if (!hdev->cpu_accessible_dma_pool) {
dev_err(hdev->dev,
"Failed to create CPU accessible DMA pool\n");
@@ -924,7 +683,7 @@ static int goya_sw_init(struct hl_device *hdev)
rc = gen_pool_add(hdev->cpu_accessible_dma_pool,
(uintptr_t) hdev->cpu_accessible_dma_mem,
- CPU_ACCESSIBLE_MEM_SIZE, -1);
+ HL_CPU_ACCESSIBLE_MEM_SIZE, -1);
if (rc) {
dev_err(hdev->dev,
"Failed to add memory to CPU accessible DMA pool\n");
@@ -939,7 +698,7 @@ static int goya_sw_init(struct hl_device *hdev)
free_cpu_pq_pool:
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
free_cpu_pq_dma_mem:
- hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
+ hdev->asic_funcs->dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
free_dma_pool:
@@ -962,7 +721,7 @@ static int goya_sw_fini(struct hl_device *hdev)
gen_pool_destroy(hdev->cpu_accessible_dma_pool);
- hdev->asic_funcs->dma_free_coherent(hdev, CPU_ACCESSIBLE_MEM_SIZE,
+ hdev->asic_funcs->dma_free_coherent(hdev, HL_CPU_ACCESSIBLE_MEM_SIZE,
hdev->cpu_accessible_dma_mem,
hdev->cpu_accessible_dma_address);
@@ -1242,7 +1001,7 @@ static int goya_init_cpu_queues(struct hl_device *hdev)
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_5, HL_QUEUE_SIZE_IN_BYTES);
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_4, HL_EQ_SIZE_IN_BYTES);
- WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, CPU_ACCESSIBLE_MEM_SIZE);
+ WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_10, HL_CPU_ACCESSIBLE_MEM_SIZE);
/* Used for EQ CI */
WREG32(mmPSOC_GLOBAL_CONF_SCRATCHPAD_6, 0);
@@ -1696,6 +1455,8 @@ static void goya_init_golden_registers(struct hl_device *hdev)
WREG32(mmDMA_CH_0_CFG0, 0x0fff0010);
WREG32(mmDMA_CH_1_CFG0, 0x0fff00F0);
+ WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
+
goya->hw_cap_initialized |= HW_CAP_GOLDEN;
}
@@ -2223,10 +1984,10 @@ static int goya_enable_msix(struct hl_device *hdev)
}
}
- irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX);
+ irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
rc = request_irq(irq, hl_irq_handler_eq, 0,
- goya_irq_name[EVENT_QUEUE_MSIX_IDX],
+ goya_irq_name[GOYA_EVENT_QUEUE_MSIX_IDX],
&hdev->event_queue);
if (rc) {
dev_err(hdev->dev, "Failed to request IRQ %d", irq);
@@ -2257,7 +2018,7 @@ static void goya_sync_irqs(struct hl_device *hdev)
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
synchronize_irq(pci_irq_vector(hdev->pdev, i));
- synchronize_irq(pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX));
+ synchronize_irq(pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX));
}
static void goya_disable_msix(struct hl_device *hdev)
@@ -2270,7 +2031,7 @@ static void goya_disable_msix(struct hl_device *hdev)
goya_sync_irqs(hdev);
- irq = pci_irq_vector(hdev->pdev, EVENT_QUEUE_MSIX_IDX);
+ irq = pci_irq_vector(hdev->pdev, GOYA_EVENT_QUEUE_MSIX_IDX);
free_irq(irq, &hdev->event_queue);
for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) {
@@ -2330,67 +2091,45 @@ static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
}
/*
- * goya_push_fw_to_device - Push FW code to device
- *
- * @hdev: pointer to hl_device structure
+ * goya_push_uboot_to_device() - Push u-boot FW code to device.
+ * @hdev: Pointer to hl_device structure.
*
- * Copy fw code from firmware file to device memory.
- * Returns 0 on success
+ * Copy u-boot fw code from firmware file to SRAM BAR.
*
+ * Return: 0 on success, non-zero for failure.
*/
-static int goya_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
- void __iomem *dst)
+static int goya_push_uboot_to_device(struct hl_device *hdev)
{
- const struct firmware *fw;
- const u64 *fw_data;
- size_t fw_size, i;
- int rc;
-
- rc = request_firmware(&fw, fw_name, hdev->dev);
-
- if (rc) {
- dev_err(hdev->dev, "Failed to request %s\n", fw_name);
- goto out;
- }
-
- fw_size = fw->size;
- if ((fw_size % 4) != 0) {
- dev_err(hdev->dev, "illegal %s firmware size %zu\n",
- fw_name, fw_size);
- rc = -EINVAL;
- goto out;
- }
-
- dev_dbg(hdev->dev, "%s firmware size == %zu\n", fw_name, fw_size);
-
- fw_data = (const u64 *) fw->data;
+ char fw_name[200];
+ void __iomem *dst;
- if ((fw->size % 8) != 0)
- fw_size -= 8;
+ snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
+ dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
- for (i = 0 ; i < fw_size ; i += 8, fw_data++, dst += 8) {
- if (!(i & (0x80000 - 1))) {
- dev_dbg(hdev->dev,
- "copied so far %zu out of %zu for %s firmware",
- i, fw_size, fw_name);
- usleep_range(20, 100);
- }
+ return hl_fw_push_fw_to_device(hdev, fw_name, dst);
+}
- writeq(*fw_data, dst);
- }
+/*
+ * goya_push_linux_to_device() - Push LINUX FW code to device.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Copy LINUX fw code from firmware file to HBM BAR.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+static int goya_push_linux_to_device(struct hl_device *hdev)
+{
+ char fw_name[200];
+ void __iomem *dst;
- if ((fw->size % 8) != 0)
- writel(*(const u32 *) fw_data, dst);
+ snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
+ dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
-out:
- release_firmware(fw);
- return rc;
+ return hl_fw_push_fw_to_device(hdev, fw_name, dst);
}
static int goya_pldm_init_cpu(struct hl_device *hdev)
{
- char fw_name[200];
- void __iomem *dst;
u32 val, unit_rst_val;
int rc;
@@ -2408,15 +2147,11 @@ static int goya_pldm_init_cpu(struct hl_device *hdev)
WREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N, unit_rst_val);
val = RREG32(mmPSOC_GLOBAL_CONF_UNIT_RST_N);
- snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-u-boot.bin");
- dst = hdev->pcie_bar[SRAM_CFG_BAR_ID] + UBOOT_FW_OFFSET;
- rc = goya_push_fw_to_device(hdev, fw_name, dst);
+ rc = goya_push_uboot_to_device(hdev);
if (rc)
return rc;
- snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
- dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
- rc = goya_push_fw_to_device(hdev, fw_name, dst);
+ rc = goya_push_linux_to_device(hdev);
if (rc)
return rc;
@@ -2478,8 +2213,6 @@ static void goya_read_device_fw_version(struct hl_device *hdev,
static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
{
struct goya_device *goya = hdev->asic_specific;
- char fw_name[200];
- void __iomem *dst;
u32 status;
int rc;
@@ -2550,6 +2283,11 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
"ARM status %d - DDR initialization failed\n",
status);
break;
+ case CPU_BOOT_STATUS_UBOOT_NOT_READY:
+ dev_err(hdev->dev,
+ "ARM status %d - u-boot stopped by user\n",
+ status);
+ break;
default:
dev_err(hdev->dev,
"ARM status %d - Invalid status code\n",
@@ -2571,9 +2309,7 @@ static int goya_init_cpu(struct hl_device *hdev, u32 cpu_timeout)
goto out;
}
- snprintf(fw_name, sizeof(fw_name), "habanalabs/goya/goya-fit.itb");
- dst = hdev->pcie_bar[DDR_BAR_ID] + LINUX_FW_OFFSET;
- rc = goya_push_fw_to_device(hdev, fw_name, dst);
+ rc = goya_push_linux_to_device(hdev);
if (rc)
return rc;
@@ -2606,6 +2342,38 @@ out:
return 0;
}
+static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
+ u64 phys_addr)
+{
+ u32 status, timeout_usec;
+ int rc;
+
+ if (hdev->pldm)
+ timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
+ else
+ timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
+
+ WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
+ WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
+ WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
+
+ rc = hl_poll_timeout(
+ hdev,
+ MMU_ASID_BUSY,
+ status,
+ !(status & 0x80000000),
+ 1000,
+ timeout_usec);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Timeout during MMU hop0 config of asid %d\n", asid);
+ return rc;
+ }
+
+ return 0;
+}
+
static int goya_mmu_init(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -2729,28 +2497,16 @@ static int goya_hw_init(struct hl_device *hdev)
goto disable_msix;
}
- /* CPU initialization is finished, we can now move to 48 bit DMA mask */
- rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(48));
- if (rc) {
- dev_warn(hdev->dev, "Unable to set pci dma mask to 48 bits\n");
- rc = pci_set_dma_mask(hdev->pdev, DMA_BIT_MASK(32));
- if (rc) {
- dev_err(hdev->dev,
- "Unable to set pci dma mask to 32 bits\n");
- goto disable_pci_access;
- }
- }
-
- rc = pci_set_consistent_dma_mask(hdev->pdev, DMA_BIT_MASK(48));
- if (rc) {
- dev_warn(hdev->dev,
- "Unable to set pci consistent dma mask to 48 bits\n");
- rc = pci_set_consistent_dma_mask(hdev->pdev, DMA_BIT_MASK(32));
- if (rc) {
- dev_err(hdev->dev,
- "Unable to set pci consistent dma mask to 32 bits\n");
+ /*
+ * Check if we managed to set the DMA mask to more then 32 bits. If so,
+ * let's try to increase it again because in Goya we set the initial
+ * dma mask to less then 39 bits so that the allocation of the memory
+ * area for the device's cpu will be under 39 bits
+ */
+ if (hdev->dma_mask > 32) {
+ rc = hl_pci_set_dma_mask(hdev, 48);
+ if (rc)
goto disable_pci_access;
- }
}
/* Perform read from the device to flush all MSI-X configuration */
@@ -2759,7 +2515,7 @@ static int goya_hw_init(struct hl_device *hdev)
return 0;
disable_pci_access:
- goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+ hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
disable_msix:
goya_disable_msix(hdev);
disable_queues:
@@ -2866,7 +2622,7 @@ int goya_suspend(struct hl_device *hdev)
{
int rc;
- rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
+ rc = hl_fw_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS);
if (rc)
dev_err(hdev->dev, "Failed to disable PCI access from CPU\n");
@@ -3067,6 +2823,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
dma_addr_t fence_dma_addr;
struct hl_cb *cb;
u32 tmp, timeout;
+ char buf[16] = {};
int rc;
if (hdev->pldm)
@@ -3074,9 +2831,10 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
else
timeout = HL_DEVICE_TIMEOUT_USEC;
- if (!hdev->asic_funcs->is_device_idle(hdev)) {
+ if (!hdev->asic_funcs->is_device_idle(hdev, buf, sizeof(buf))) {
dev_err_ratelimited(hdev->dev,
- "Can't send KMD job on QMAN0 if device is not idle\n");
+ "Can't send KMD job on QMAN0 because %s is busy\n",
+ buf);
return -EBUSY;
}
@@ -3090,10 +2848,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
*fence_ptr = 0;
- if (goya->hw_cap_initialized & HW_CAP_MMU) {
- WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_FULLY_TRUSTED);
- RREG32(mmDMA_QM_0_GLBL_PROT);
- }
+ goya->qman0_set_security(hdev, true);
/*
* goya cs parser saves space for 2xpacket_msg_prot at end of CB. For
@@ -3135,10 +2890,7 @@ free_fence_ptr:
hdev->asic_funcs->dma_pool_free(hdev, (void *) fence_ptr,
fence_dma_addr);
- if (goya->hw_cap_initialized & HW_CAP_MMU) {
- WREG32(mmDMA_QM_0_GLBL_PROT, QMAN_DMA_PARTLY_TRUSTED);
- RREG32(mmDMA_QM_0_GLBL_PROT);
- }
+ goya->qman0_set_security(hdev, false);
return rc;
}
@@ -3147,10 +2899,6 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
u32 timeout, long *result)
{
struct goya_device *goya = hdev->asic_specific;
- struct armcp_packet *pkt;
- dma_addr_t pkt_dma_addr;
- u32 tmp;
- int rc = 0;
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q)) {
if (result)
@@ -3158,74 +2906,8 @@ int goya_send_cpu_message(struct hl_device *hdev, u32 *msg, u16 len,
return 0;
}
- if (len > CPU_CB_SIZE) {
- dev_err(hdev->dev, "Invalid CPU message size of %d bytes\n",
- len);
- return -ENOMEM;
- }
-
- pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
- &pkt_dma_addr);
- if (!pkt) {
- dev_err(hdev->dev,
- "Failed to allocate DMA memory for packet to CPU\n");
- return -ENOMEM;
- }
-
- memcpy(pkt, msg, len);
-
- mutex_lock(&hdev->send_cpu_message_lock);
-
- if (hdev->disabled)
- goto out;
-
- if (hdev->device_cpu_disabled) {
- rc = -EIO;
- goto out;
- }
-
- rc = hl_hw_queue_send_cb_no_cmpl(hdev, GOYA_QUEUE_ID_CPU_PQ, len,
- pkt_dma_addr);
- if (rc) {
- dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
- goto out;
- }
-
- rc = hl_poll_timeout_memory(hdev, (u64) (uintptr_t) &pkt->fence,
- timeout, &tmp);
-
- hl_hw_queue_inc_ci_kernel(hdev, GOYA_QUEUE_ID_CPU_PQ);
-
- if (rc == -ETIMEDOUT) {
- dev_err(hdev->dev, "Timeout while waiting for device CPU\n");
- hdev->device_cpu_disabled = true;
- goto out;
- }
-
- if (tmp == ARMCP_PACKET_FENCE_VAL) {
- u32 ctl = le32_to_cpu(pkt->ctl);
-
- rc = (ctl & ARMCP_PKT_CTL_RC_MASK) >> ARMCP_PKT_CTL_RC_SHIFT;
- if (rc) {
- dev_err(hdev->dev,
- "F/W ERROR %d for CPU packet %d\n",
- rc, (ctl & ARMCP_PKT_CTL_OPCODE_MASK)
- >> ARMCP_PKT_CTL_OPCODE_SHIFT);
- rc = -EINVAL;
- } else if (result) {
- *result = (long) le64_to_cpu(pkt->result);
- }
- } else {
- dev_err(hdev->dev, "CPU packet wrong fence value\n");
- rc = -EINVAL;
- }
-
-out:
- mutex_unlock(&hdev->send_cpu_message_lock);
-
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, len, pkt);
-
- return rc;
+ return hl_fw_send_cpu_message(hdev, GOYA_QUEUE_ID_CPU_PQ, msg, len,
+ timeout, result);
}
int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id)
@@ -3303,38 +2985,20 @@ free_fence_ptr:
int goya_test_cpu_queue(struct hl_device *hdev)
{
- struct armcp_packet test_pkt;
- long result;
- int rc;
-
- /* cpu_queues_enable flag is always checked in send cpu message */
-
- memset(&test_pkt, 0, sizeof(test_pkt));
-
- test_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- test_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &test_pkt,
- sizeof(test_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
+ struct goya_device *goya = hdev->asic_specific;
- if (!rc) {
- if (result == ARMCP_PACKET_FENCE_VAL)
- dev_info(hdev->dev,
- "queue test on CPU queue succeeded\n");
- else
- dev_err(hdev->dev,
- "CPU queue test failed (0x%08lX)\n", result);
- } else {
- dev_err(hdev->dev, "CPU queue test failed, error %d\n", rc);
- }
+ /*
+ * check capability here as send_cpu_message() won't update the result
+ * value if no capability
+ */
+ if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
+ return 0;
- return rc;
+ return hl_fw_test_cpu_queue(hdev);
}
-static int goya_test_queues(struct hl_device *hdev)
+int goya_test_queues(struct hl_device *hdev)
{
- struct goya_device *goya = hdev->asic_specific;
int i, rc, ret_val = 0;
for (i = 0 ; i < NUMBER_OF_EXT_HW_QUEUES ; i++) {
@@ -3344,7 +3008,7 @@ static int goya_test_queues(struct hl_device *hdev)
}
if (hdev->cpu_queues_enable) {
- rc = goya->test_cpu_queue(hdev);
+ rc = goya_test_cpu_queue(hdev);
if (rc)
ret_val = -EINVAL;
}
@@ -3367,30 +3031,16 @@ static void goya_dma_pool_free(struct hl_device *hdev, void *vaddr,
dma_pool_free(hdev->dma_pool, vaddr, dma_addr);
}
-static void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev,
- size_t size, dma_addr_t *dma_handle)
+void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle)
{
- u64 kernel_addr;
-
- /* roundup to CPU_PKT_SIZE */
- size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK;
-
- kernel_addr = gen_pool_alloc(hdev->cpu_accessible_dma_pool, size);
-
- *dma_handle = hdev->cpu_accessible_dma_address +
- (kernel_addr - (u64) (uintptr_t) hdev->cpu_accessible_dma_mem);
-
- return (void *) (uintptr_t) kernel_addr;
+ return hl_fw_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle);
}
-static void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev,
- size_t size, void *vaddr)
+void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+ void *vaddr)
{
- /* roundup to CPU_PKT_SIZE */
- size = (size + (CPU_PKT_SIZE - 1)) & CPU_PKT_MASK;
-
- gen_pool_free(hdev->cpu_accessible_dma_pool, (u64) (uintptr_t) vaddr,
- size);
+ hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr);
}
static int goya_dma_map_sg(struct hl_device *hdev, struct scatterlist *sg,
@@ -4408,6 +4058,9 @@ static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
{
struct goya_device *goya = hdev->asic_specific;
+ if (hdev->hard_reset_pending)
+ return U64_MAX;
+
return readq(hdev->pcie_bar[DDR_BAR_ID] +
(addr - goya->ddr_bar_cur_addr));
}
@@ -4416,6 +4069,9 @@ static void goya_write_pte(struct hl_device *hdev, u64 addr, u64 val)
{
struct goya_device *goya = hdev->asic_specific;
+ if (hdev->hard_reset_pending)
+ return;
+
writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
(addr - goya->ddr_bar_cur_addr));
}
@@ -4605,8 +4261,8 @@ static int goya_unmask_irq_arr(struct hl_device *hdev, u32 *irq_arr,
pkt->armcp_pkt.ctl = cpu_to_le32(ARMCP_PACKET_UNMASK_RAZWI_IRQ_ARRAY <<
ARMCP_PKT_CTL_OPCODE_SHIFT);
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) pkt,
- total_pkt_size, HL_DEVICE_TIMEOUT_USEC, &result);
+ rc = goya_send_cpu_message(hdev, (u32 *) pkt, total_pkt_size,
+ HL_DEVICE_TIMEOUT_USEC, &result);
if (rc)
dev_err(hdev->dev, "failed to unmask IRQ array\n");
@@ -4622,8 +4278,8 @@ static int goya_soft_reset_late_init(struct hl_device *hdev)
* Unmask all IRQs since some could have been received
* during the soft reset
*/
- return goya_unmask_irq_arr(hdev, goya_non_fatal_events,
- sizeof(goya_non_fatal_events));
+ return goya_unmask_irq_arr(hdev, goya_all_events,
+ sizeof(goya_all_events));
}
static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
@@ -4638,7 +4294,7 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
ARMCP_PKT_CTL_OPCODE_SHIFT);
pkt.value = cpu_to_le64(event_type);
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ rc = goya_send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
HL_DEVICE_TIMEOUT_USEC, &result);
if (rc)
@@ -4854,12 +4510,13 @@ static int goya_context_switch(struct hl_device *hdev, u32 asid)
return rc;
}
+ WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
goya_mmu_prepare(hdev, asid);
return 0;
}
-static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
+int goya_mmu_clear_pgt_range(struct hl_device *hdev)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
struct goya_device *goya = hdev->asic_specific;
@@ -4873,7 +4530,7 @@ static int goya_mmu_clear_pgt_range(struct hl_device *hdev)
return goya_memset_device_memory(hdev, addr, size, 0, true);
}
-static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
+int goya_mmu_set_dram_default_page(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
u64 addr = hdev->asic_prop.mmu_dram_default_page_addr;
@@ -4886,7 +4543,7 @@ static int goya_mmu_set_dram_default_page(struct hl_device *hdev)
return goya_memset_device_memory(hdev, addr, size, val, true);
}
-static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
+void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
{
struct goya_device *goya = hdev->asic_specific;
int i;
@@ -4900,10 +4557,8 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
}
/* zero the MMBP and ASID bits and then set the ASID */
- for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++) {
- WREG32_AND(goya_mmu_regs[i], ~0x7FF);
- WREG32_OR(goya_mmu_regs[i], asid);
- }
+ for (i = 0 ; i < GOYA_MMU_REGS_NUM ; i++)
+ goya->mmu_prepare_reg(hdev, goya_mmu_regs[i], asid);
}
static void goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard)
@@ -4994,107 +4649,29 @@ static void goya_mmu_invalidate_cache_range(struct hl_device *hdev,
"Timeout when waiting for MMU cache invalidation\n");
}
-static int goya_mmu_update_asid_hop0_addr(struct hl_device *hdev, u32 asid,
- u64 phys_addr)
-{
- u32 status, timeout_usec;
- int rc;
-
- if (hdev->pldm)
- timeout_usec = GOYA_PLDM_MMU_TIMEOUT_USEC;
- else
- timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
-
- WREG32(MMU_HOP0_PA43_12, phys_addr >> MMU_HOP0_PA43_12_SHIFT);
- WREG32(MMU_HOP0_PA49_44, phys_addr >> MMU_HOP0_PA49_44_SHIFT);
- WREG32(MMU_ASID_BUSY, 0x80000000 | asid);
-
- rc = hl_poll_timeout(
- hdev,
- MMU_ASID_BUSY,
- status,
- !(status & 0x80000000),
- 1000,
- timeout_usec);
-
- if (rc) {
- dev_err(hdev->dev,
- "Timeout during MMU hop0 config of asid %d\n", asid);
- return rc;
- }
-
- return 0;
-}
-
int goya_send_heartbeat(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
- struct armcp_packet hb_pkt;
- long result;
- int rc;
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
- memset(&hb_pkt, 0, sizeof(hb_pkt));
-
- hb_pkt.ctl = cpu_to_le32(ARMCP_PACKET_TEST <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- hb_pkt.value = cpu_to_le64(ARMCP_PACKET_FENCE_VAL);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &hb_pkt,
- sizeof(hb_pkt), HL_DEVICE_TIMEOUT_USEC, &result);
-
- if ((rc) || (result != ARMCP_PACKET_FENCE_VAL))
- rc = -EIO;
-
- return rc;
+ return hl_fw_send_heartbeat(hdev);
}
-static int goya_armcp_info_get(struct hl_device *hdev)
+int goya_armcp_info_get(struct hl_device *hdev)
{
struct goya_device *goya = hdev->asic_specific;
struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct armcp_packet pkt;
- void *armcp_info_cpu_addr;
- dma_addr_t armcp_info_dma_addr;
u64 dram_size;
- long result;
int rc;
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
- armcp_info_cpu_addr =
- hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
- sizeof(struct armcp_info), &armcp_info_dma_addr);
- if (!armcp_info_cpu_addr) {
- dev_err(hdev->dev,
- "Failed to allocate DMA memory for ArmCP info packet\n");
- return -ENOMEM;
- }
-
- memset(armcp_info_cpu_addr, 0, sizeof(struct armcp_info));
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.addr = cpu_to_le64(armcp_info_dma_addr +
- prop->host_phys_base_address);
- pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- GOYA_ARMCP_INFO_TIMEOUT, &result);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to send armcp info pkt, error %d\n", rc);
- goto out;
- }
-
- memcpy(&prop->armcp_info, armcp_info_cpu_addr,
- sizeof(prop->armcp_info));
+ rc = hl_fw_armcp_info_get(hdev);
+ if (rc)
+ return rc;
dram_size = le64_to_cpu(prop->armcp_info.dram_size);
if (dram_size) {
@@ -5110,32 +4687,10 @@ static int goya_armcp_info_get(struct hl_device *hdev)
prop->dram_end_address = prop->dram_base_address + dram_size;
}
- rc = hl_build_hwmon_channel_info(hdev, prop->armcp_info.sensors);
- if (rc) {
- dev_err(hdev->dev,
- "Failed to build hwmon channel info, error %d\n", rc);
- rc = -EFAULT;
- goto out;
- }
-
-out:
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
- sizeof(struct armcp_info), armcp_info_cpu_addr);
-
- return rc;
-}
-
-static void goya_init_clock_gating(struct hl_device *hdev)
-{
-
-}
-
-static void goya_disable_clock_gating(struct hl_device *hdev)
-{
-
+ return 0;
}
-static bool goya_is_device_idle(struct hl_device *hdev)
+static bool goya_is_device_idle(struct hl_device *hdev, char *buf, size_t size)
{
u64 offset, dma_qm_reg, tpc_qm_reg, tpc_cmdq_reg, tpc_cfg_reg;
int i;
@@ -5147,7 +4702,7 @@ static bool goya_is_device_idle(struct hl_device *hdev)
if ((RREG32(dma_qm_reg) & DMA_QM_IDLE_MASK) !=
DMA_QM_IDLE_MASK)
- return false;
+ return HL_ENG_BUSY(buf, size, "DMA%d_QM", i);
}
offset = mmTPC1_QM_GLBL_STS0 - mmTPC0_QM_GLBL_STS0;
@@ -5159,31 +4714,31 @@ static bool goya_is_device_idle(struct hl_device *hdev)
if ((RREG32(tpc_qm_reg) & TPC_QM_IDLE_MASK) !=
TPC_QM_IDLE_MASK)
- return false;
+ return HL_ENG_BUSY(buf, size, "TPC%d_QM", i);
if ((RREG32(tpc_cmdq_reg) & TPC_CMDQ_IDLE_MASK) !=
TPC_CMDQ_IDLE_MASK)
- return false;
+ return HL_ENG_BUSY(buf, size, "TPC%d_CMDQ", i);
if ((RREG32(tpc_cfg_reg) & TPC_CFG_IDLE_MASK) !=
TPC_CFG_IDLE_MASK)
- return false;
+ return HL_ENG_BUSY(buf, size, "TPC%d_CFG", i);
}
if ((RREG32(mmMME_QM_GLBL_STS0) & MME_QM_IDLE_MASK) !=
MME_QM_IDLE_MASK)
- return false;
+ return HL_ENG_BUSY(buf, size, "MME_QM");
if ((RREG32(mmMME_CMDQ_GLBL_STS0) & MME_CMDQ_IDLE_MASK) !=
MME_CMDQ_IDLE_MASK)
- return false;
+ return HL_ENG_BUSY(buf, size, "MME_CMDQ");
if ((RREG32(mmMME_ARCH_STATUS) & MME_ARCH_IDLE_MASK) !=
MME_ARCH_IDLE_MASK)
- return false;
+ return HL_ENG_BUSY(buf, size, "MME_ARCH");
if (RREG32(mmMME_SHADOW_0_STATUS) & MME_SHADOW_IDLE_MASK)
- return false;
+ return HL_ENG_BUSY(buf, size, "MME");
return true;
}
@@ -5211,52 +4766,11 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
size_t max_size)
{
struct goya_device *goya = hdev->asic_specific;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct armcp_packet pkt;
- void *eeprom_info_cpu_addr;
- dma_addr_t eeprom_info_dma_addr;
- long result;
- int rc;
if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
return 0;
- eeprom_info_cpu_addr =
- hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev,
- max_size, &eeprom_info_dma_addr);
- if (!eeprom_info_cpu_addr) {
- dev_err(hdev->dev,
- "Failed to allocate DMA memory for EEPROM info packet\n");
- return -ENOMEM;
- }
-
- memset(eeprom_info_cpu_addr, 0, max_size);
-
- memset(&pkt, 0, sizeof(pkt));
-
- pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
- ARMCP_PKT_CTL_OPCODE_SHIFT);
- pkt.addr = cpu_to_le64(eeprom_info_dma_addr +
- prop->host_phys_base_address);
- pkt.data_max_size = cpu_to_le32(max_size);
-
- rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
- GOYA_ARMCP_EEPROM_TIMEOUT, &result);
-
- if (rc) {
- dev_err(hdev->dev,
- "Failed to send armcp EEPROM pkt, error %d\n", rc);
- goto out;
- }
-
- /* result contains the actual size */
- memcpy(data, eeprom_info_cpu_addr, min((size_t)result, max_size));
-
-out:
- hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev, max_size,
- eeprom_info_cpu_addr);
-
- return rc;
+ return hl_fw_get_eeprom_data(hdev, data, max_size);
}
static enum hl_device_hw_state goya_get_hw_state(struct hl_device *hdev)
@@ -5306,8 +4820,7 @@ static const struct hl_asic_funcs goya_funcs = {
.mmu_invalidate_cache = goya_mmu_invalidate_cache,
.mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range,
.send_heartbeat = goya_send_heartbeat,
- .enable_clock_gating = goya_init_clock_gating,
- .disable_clock_gating = goya_disable_clock_gating,
+ .debug_coresight = goya_debug_coresight,
.is_device_idle = goya_is_device_idle,
.soft_reset_late_init = goya_soft_reset_late_init,
.hw_queues_lock = goya_hw_queues_lock,
@@ -5315,7 +4828,10 @@ static const struct hl_asic_funcs goya_funcs = {
.get_pci_id = goya_get_pci_id,
.get_eeprom_data = goya_get_eeprom_data,
.send_cpu_message = goya_send_cpu_message,
- .get_hw_state = goya_get_hw_state
+ .get_hw_state = goya_get_hw_state,
+ .pci_bars_map = goya_pci_bars_map,
+ .set_dram_bar_base = goya_set_ddr_bar_base,
+ .init_iatu = goya_init_iatu
};
/*
diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h
index 830551b6b062..b572e0263ac5 100644
--- a/drivers/misc/habanalabs/goya/goyaP.h
+++ b/drivers/misc/habanalabs/goya/goyaP.h
@@ -39,9 +39,13 @@
#error "Number of MSIX interrupts must be smaller or equal to GOYA_MSIX_ENTRIES"
#endif
-#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */
+#define QMAN_FENCE_TIMEOUT_USEC 10000 /* 10 ms */
-#define QMAN_STOP_TIMEOUT_USEC 100000 /* 100 ms */
+#define QMAN_STOP_TIMEOUT_USEC 100000 /* 100 ms */
+
+#define CORESIGHT_TIMEOUT_USEC 100000 /* 100 ms */
+
+#define GOYA_CPU_TIMEOUT_USEC 10000000 /* 10s */
#define TPC_ENABLED_MASK 0xFF
@@ -49,19 +53,14 @@
#define MAX_POWER_DEFAULT 200000 /* 200W */
-#define GOYA_ARMCP_INFO_TIMEOUT 10000000 /* 10s */
-#define GOYA_ARMCP_EEPROM_TIMEOUT 10000000 /* 10s */
-
#define DRAM_PHYS_DEFAULT_SIZE 0x100000000ull /* 4GB */
/* DRAM Memory Map */
#define CPU_FW_IMAGE_SIZE 0x10000000 /* 256MB */
-#define MMU_PAGE_TABLES_SIZE 0x0DE00000 /* 222MB */
+#define MMU_PAGE_TABLES_SIZE 0x0FC00000 /* 252MB */
#define MMU_DRAM_DEFAULT_PAGE_SIZE 0x00200000 /* 2MB */
#define MMU_CACHE_MNG_SIZE 0x00001000 /* 4KB */
-#define CPU_PQ_PKT_SIZE 0x00001000 /* 4KB */
-#define CPU_PQ_DATA_SIZE 0x01FFE000 /* 32MB - 8KB */
#define CPU_FW_IMAGE_ADDR DRAM_PHYS_BASE
#define MMU_PAGE_TABLES_ADDR (CPU_FW_IMAGE_ADDR + CPU_FW_IMAGE_SIZE)
@@ -69,13 +68,13 @@
MMU_PAGE_TABLES_SIZE)
#define MMU_CACHE_MNG_ADDR (MMU_DRAM_DEFAULT_PAGE_ADDR + \
MMU_DRAM_DEFAULT_PAGE_SIZE)
-#define CPU_PQ_PKT_ADDR (MMU_CACHE_MNG_ADDR + \
+#define DRAM_KMD_END_ADDR (MMU_CACHE_MNG_ADDR + \
MMU_CACHE_MNG_SIZE)
-#define CPU_PQ_DATA_ADDR (CPU_PQ_PKT_ADDR + CPU_PQ_PKT_SIZE)
-#define DRAM_BASE_ADDR_USER (CPU_PQ_DATA_ADDR + CPU_PQ_DATA_SIZE)
-#if (DRAM_BASE_ADDR_USER != 0x20000000)
-#error "KMD must reserve 512MB"
+#define DRAM_BASE_ADDR_USER 0x20000000
+
+#if (DRAM_KMD_END_ADDR > DRAM_BASE_ADDR_USER)
+#error "KMD must reserve no more than 512MB"
#endif
/*
@@ -142,21 +141,14 @@
#define HW_CAP_GOLDEN 0x00000400
#define HW_CAP_TPC 0x00000800
-#define CPU_PKT_SHIFT 5
-#define CPU_PKT_SIZE (1 << CPU_PKT_SHIFT)
-#define CPU_PKT_MASK (~((1 << CPU_PKT_SHIFT) - 1))
-#define CPU_MAX_PKTS_IN_CB 32
-#define CPU_CB_SIZE (CPU_PKT_SIZE * CPU_MAX_PKTS_IN_CB)
-#define CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * CPU_CB_SIZE)
-
enum goya_fw_component {
FW_COMP_UBOOT,
FW_COMP_PREBOOT
};
struct goya_device {
- int (*test_cpu_queue)(struct hl_device *hdev);
- int (*armcp_info_get)(struct hl_device *hdev);
+ void (*mmu_prepare_reg)(struct hl_device *hdev, u64 reg, u32 asid);
+ void (*qman0_set_security)(struct hl_device *hdev, bool secure);
/* TODO: remove hw_queues_lock after moving to scheduler code */
spinlock_t hw_queues_lock;
@@ -188,11 +180,16 @@ void goya_debugfs_led_set(struct hl_device *hdev, u8 led, u8 state);
void goya_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
void goya_add_device_attr(struct hl_device *hdev,
struct attribute_group *dev_attr_grp);
+int goya_armcp_info_get(struct hl_device *hdev);
void goya_init_security(struct hl_device *hdev);
+int goya_debug_coresight(struct hl_device *hdev, void *data);
u64 goya_get_max_power(struct hl_device *hdev);
void goya_set_max_power(struct hl_device *hdev, u64 value);
+int goya_test_queues(struct hl_device *hdev);
+void goya_mmu_prepare(struct hl_device *hdev, u32 asid);
+int goya_mmu_clear_pgt_range(struct hl_device *hdev);
+int goya_mmu_set_dram_default_page(struct hl_device *hdev);
-int goya_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
void goya_late_fini(struct hl_device *hdev);
int goya_suspend(struct hl_device *hdev);
int goya_resume(struct hl_device *hdev);
@@ -207,5 +204,9 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id,
u32 goya_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt);
int goya_test_queue(struct hl_device *hdev, u32 hw_queue_id);
int goya_send_heartbeat(struct hl_device *hdev);
+void *goya_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle);
+void goya_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+ void *vaddr);
#endif /* GOYAP_H_ */
diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c
new file mode 100644
index 000000000000..68726fb4c56a
--- /dev/null
+++ b/drivers/misc/habanalabs/goya/goya_coresight.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "goyaP.h"
+#include "include/goya/goya_coresight.h"
+#include "include/goya/asic_reg/goya_regs.h"
+
+#include <uapi/misc/habanalabs.h>
+
+#include <linux/coresight.h>
+
+#define GOYA_PLDM_CORESIGHT_TIMEOUT_USEC (CORESIGHT_TIMEOUT_USEC * 100)
+
+static u64 debug_stm_regs[GOYA_STM_LAST + 1] = {
+ [GOYA_STM_CPU] = mmCPU_STM_BASE,
+ [GOYA_STM_DMA_CH_0_CS] = mmDMA_CH_0_CS_STM_BASE,
+ [GOYA_STM_DMA_CH_1_CS] = mmDMA_CH_1_CS_STM_BASE,
+ [GOYA_STM_DMA_CH_2_CS] = mmDMA_CH_2_CS_STM_BASE,
+ [GOYA_STM_DMA_CH_3_CS] = mmDMA_CH_3_CS_STM_BASE,
+ [GOYA_STM_DMA_CH_4_CS] = mmDMA_CH_4_CS_STM_BASE,
+ [GOYA_STM_DMA_MACRO_CS] = mmDMA_MACRO_CS_STM_BASE,
+ [GOYA_STM_MME1_SBA] = mmMME1_SBA_STM_BASE,
+ [GOYA_STM_MME3_SBB] = mmMME3_SBB_STM_BASE,
+ [GOYA_STM_MME4_WACS2] = mmMME4_WACS2_STM_BASE,
+ [GOYA_STM_MME4_WACS] = mmMME4_WACS_STM_BASE,
+ [GOYA_STM_MMU_CS] = mmMMU_CS_STM_BASE,
+ [GOYA_STM_PCIE] = mmPCIE_STM_BASE,
+ [GOYA_STM_PSOC] = mmPSOC_STM_BASE,
+ [GOYA_STM_TPC0_EML] = mmTPC0_EML_STM_BASE,
+ [GOYA_STM_TPC1_EML] = mmTPC1_EML_STM_BASE,
+ [GOYA_STM_TPC2_EML] = mmTPC2_EML_STM_BASE,
+ [GOYA_STM_TPC3_EML] = mmTPC3_EML_STM_BASE,
+ [GOYA_STM_TPC4_EML] = mmTPC4_EML_STM_BASE,
+ [GOYA_STM_TPC5_EML] = mmTPC5_EML_STM_BASE,
+ [GOYA_STM_TPC6_EML] = mmTPC6_EML_STM_BASE,
+ [GOYA_STM_TPC7_EML] = mmTPC7_EML_STM_BASE
+};
+
+static u64 debug_etf_regs[GOYA_ETF_LAST + 1] = {
+ [GOYA_ETF_CPU_0] = mmCPU_ETF_0_BASE,
+ [GOYA_ETF_CPU_1] = mmCPU_ETF_1_BASE,
+ [GOYA_ETF_CPU_TRACE] = mmCPU_ETF_TRACE_BASE,
+ [GOYA_ETF_DMA_CH_0_CS] = mmDMA_CH_0_CS_ETF_BASE,
+ [GOYA_ETF_DMA_CH_1_CS] = mmDMA_CH_1_CS_ETF_BASE,
+ [GOYA_ETF_DMA_CH_2_CS] = mmDMA_CH_2_CS_ETF_BASE,
+ [GOYA_ETF_DMA_CH_3_CS] = mmDMA_CH_3_CS_ETF_BASE,
+ [GOYA_ETF_DMA_CH_4_CS] = mmDMA_CH_4_CS_ETF_BASE,
+ [GOYA_ETF_DMA_MACRO_CS] = mmDMA_MACRO_CS_ETF_BASE,
+ [GOYA_ETF_MME1_SBA] = mmMME1_SBA_ETF_BASE,
+ [GOYA_ETF_MME3_SBB] = mmMME3_SBB_ETF_BASE,
+ [GOYA_ETF_MME4_WACS2] = mmMME4_WACS2_ETF_BASE,
+ [GOYA_ETF_MME4_WACS] = mmMME4_WACS_ETF_BASE,
+ [GOYA_ETF_MMU_CS] = mmMMU_CS_ETF_BASE,
+ [GOYA_ETF_PCIE] = mmPCIE_ETF_BASE,
+ [GOYA_ETF_PSOC] = mmPSOC_ETF_BASE,
+ [GOYA_ETF_TPC0_EML] = mmTPC0_EML_ETF_BASE,
+ [GOYA_ETF_TPC1_EML] = mmTPC1_EML_ETF_BASE,
+ [GOYA_ETF_TPC2_EML] = mmTPC2_EML_ETF_BASE,
+ [GOYA_ETF_TPC3_EML] = mmTPC3_EML_ETF_BASE,
+ [GOYA_ETF_TPC4_EML] = mmTPC4_EML_ETF_BASE,
+ [GOYA_ETF_TPC5_EML] = mmTPC5_EML_ETF_BASE,
+ [GOYA_ETF_TPC6_EML] = mmTPC6_EML_ETF_BASE,
+ [GOYA_ETF_TPC7_EML] = mmTPC7_EML_ETF_BASE
+};
+
+static u64 debug_funnel_regs[GOYA_FUNNEL_LAST + 1] = {
+ [GOYA_FUNNEL_CPU] = mmCPU_FUNNEL_BASE,
+ [GOYA_FUNNEL_DMA_CH_6_1] = mmDMA_CH_FUNNEL_6_1_BASE,
+ [GOYA_FUNNEL_DMA_MACRO_3_1] = mmDMA_MACRO_FUNNEL_3_1_BASE,
+ [GOYA_FUNNEL_MME0_RTR] = mmMME0_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_MME1_RTR] = mmMME1_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_MME2_RTR] = mmMME2_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_MME3_RTR] = mmMME3_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_MME4_RTR] = mmMME4_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_MME5_RTR] = mmMME5_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_PCIE] = mmPCIE_FUNNEL_BASE,
+ [GOYA_FUNNEL_PSOC] = mmPSOC_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC0_EML] = mmTPC0_EML_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC1_EML] = mmTPC1_EML_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC1_RTR] = mmTPC1_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC2_EML] = mmTPC2_EML_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC2_RTR] = mmTPC2_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC3_EML] = mmTPC3_EML_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC3_RTR] = mmTPC3_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC4_EML] = mmTPC4_EML_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC4_RTR] = mmTPC4_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC5_EML] = mmTPC5_EML_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC5_RTR] = mmTPC5_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC6_EML] = mmTPC6_EML_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC6_RTR] = mmTPC6_RTR_FUNNEL_BASE,
+ [GOYA_FUNNEL_TPC7_EML] = mmTPC7_EML_FUNNEL_BASE
+};
+
+static u64 debug_bmon_regs[GOYA_BMON_LAST + 1] = {
+ [GOYA_BMON_CPU_RD] = mmCPU_RD_BMON_BASE,
+ [GOYA_BMON_CPU_WR] = mmCPU_WR_BMON_BASE,
+ [GOYA_BMON_DMA_CH_0_0] = mmDMA_CH_0_BMON_0_BASE,
+ [GOYA_BMON_DMA_CH_0_1] = mmDMA_CH_0_BMON_1_BASE,
+ [GOYA_BMON_DMA_CH_1_0] = mmDMA_CH_1_BMON_0_BASE,
+ [GOYA_BMON_DMA_CH_1_1] = mmDMA_CH_1_BMON_1_BASE,
+ [GOYA_BMON_DMA_CH_2_0] = mmDMA_CH_2_BMON_0_BASE,
+ [GOYA_BMON_DMA_CH_2_1] = mmDMA_CH_2_BMON_1_BASE,
+ [GOYA_BMON_DMA_CH_3_0] = mmDMA_CH_3_BMON_0_BASE,
+ [GOYA_BMON_DMA_CH_3_1] = mmDMA_CH_3_BMON_1_BASE,
+ [GOYA_BMON_DMA_CH_4_0] = mmDMA_CH_4_BMON_0_BASE,
+ [GOYA_BMON_DMA_CH_4_1] = mmDMA_CH_4_BMON_1_BASE,
+ [GOYA_BMON_DMA_MACRO_0] = mmDMA_MACRO_BMON_0_BASE,
+ [GOYA_BMON_DMA_MACRO_1] = mmDMA_MACRO_BMON_1_BASE,
+ [GOYA_BMON_DMA_MACRO_2] = mmDMA_MACRO_BMON_2_BASE,
+ [GOYA_BMON_DMA_MACRO_3] = mmDMA_MACRO_BMON_3_BASE,
+ [GOYA_BMON_DMA_MACRO_4] = mmDMA_MACRO_BMON_4_BASE,
+ [GOYA_BMON_DMA_MACRO_5] = mmDMA_MACRO_BMON_5_BASE,
+ [GOYA_BMON_DMA_MACRO_6] = mmDMA_MACRO_BMON_6_BASE,
+ [GOYA_BMON_DMA_MACRO_7] = mmDMA_MACRO_BMON_7_BASE,
+ [GOYA_BMON_MME1_SBA_0] = mmMME1_SBA_BMON0_BASE,
+ [GOYA_BMON_MME1_SBA_1] = mmMME1_SBA_BMON1_BASE,
+ [GOYA_BMON_MME3_SBB_0] = mmMME3_SBB_BMON0_BASE,
+ [GOYA_BMON_MME3_SBB_1] = mmMME3_SBB_BMON1_BASE,
+ [GOYA_BMON_MME4_WACS2_0] = mmMME4_WACS2_BMON0_BASE,
+ [GOYA_BMON_MME4_WACS2_1] = mmMME4_WACS2_BMON1_BASE,
+ [GOYA_BMON_MME4_WACS2_2] = mmMME4_WACS2_BMON2_BASE,
+ [GOYA_BMON_MME4_WACS_0] = mmMME4_WACS_BMON0_BASE,
+ [GOYA_BMON_MME4_WACS_1] = mmMME4_WACS_BMON1_BASE,
+ [GOYA_BMON_MME4_WACS_2] = mmMME4_WACS_BMON2_BASE,
+ [GOYA_BMON_MME4_WACS_3] = mmMME4_WACS_BMON3_BASE,
+ [GOYA_BMON_MME4_WACS_4] = mmMME4_WACS_BMON4_BASE,
+ [GOYA_BMON_MME4_WACS_5] = mmMME4_WACS_BMON5_BASE,
+ [GOYA_BMON_MME4_WACS_6] = mmMME4_WACS_BMON6_BASE,
+ [GOYA_BMON_MMU_0] = mmMMU_BMON_0_BASE,
+ [GOYA_BMON_MMU_1] = mmMMU_BMON_1_BASE,
+ [GOYA_BMON_PCIE_MSTR_RD] = mmPCIE_BMON_MSTR_RD_BASE,
+ [GOYA_BMON_PCIE_MSTR_WR] = mmPCIE_BMON_MSTR_WR_BASE,
+ [GOYA_BMON_PCIE_SLV_RD] = mmPCIE_BMON_SLV_RD_BASE,
+ [GOYA_BMON_PCIE_SLV_WR] = mmPCIE_BMON_SLV_WR_BASE,
+ [GOYA_BMON_TPC0_EML_0] = mmTPC0_EML_BUSMON_0_BASE,
+ [GOYA_BMON_TPC0_EML_1] = mmTPC0_EML_BUSMON_1_BASE,
+ [GOYA_BMON_TPC0_EML_2] = mmTPC0_EML_BUSMON_2_BASE,
+ [GOYA_BMON_TPC0_EML_3] = mmTPC0_EML_BUSMON_3_BASE,
+ [GOYA_BMON_TPC1_EML_0] = mmTPC1_EML_BUSMON_0_BASE,
+ [GOYA_BMON_TPC1_EML_1] = mmTPC1_EML_BUSMON_1_BASE,
+ [GOYA_BMON_TPC1_EML_2] = mmTPC1_EML_BUSMON_2_BASE,
+ [GOYA_BMON_TPC1_EML_3] = mmTPC1_EML_BUSMON_3_BASE,
+ [GOYA_BMON_TPC2_EML_0] = mmTPC2_EML_BUSMON_0_BASE,
+ [GOYA_BMON_TPC2_EML_1] = mmTPC2_EML_BUSMON_1_BASE,
+ [GOYA_BMON_TPC2_EML_2] = mmTPC2_EML_BUSMON_2_BASE,
+ [GOYA_BMON_TPC2_EML_3] = mmTPC2_EML_BUSMON_3_BASE,
+ [GOYA_BMON_TPC3_EML_0] = mmTPC3_EML_BUSMON_0_BASE,
+ [GOYA_BMON_TPC3_EML_1] = mmTPC3_EML_BUSMON_1_BASE,
+ [GOYA_BMON_TPC3_EML_2] = mmTPC3_EML_BUSMON_2_BASE,
+ [GOYA_BMON_TPC3_EML_3] = mmTPC3_EML_BUSMON_3_BASE,
+ [GOYA_BMON_TPC4_EML_0] = mmTPC4_EML_BUSMON_0_BASE,
+ [GOYA_BMON_TPC4_EML_1] = mmTPC4_EML_BUSMON_1_BASE,
+ [GOYA_BMON_TPC4_EML_2] = mmTPC4_EML_BUSMON_2_BASE,
+ [GOYA_BMON_TPC4_EML_3] = mmTPC4_EML_BUSMON_3_BASE,
+ [GOYA_BMON_TPC5_EML_0] = mmTPC5_EML_BUSMON_0_BASE,
+ [GOYA_BMON_TPC5_EML_1] = mmTPC5_EML_BUSMON_1_BASE,
+ [GOYA_BMON_TPC5_EML_2] = mmTPC5_EML_BUSMON_2_BASE,
+ [GOYA_BMON_TPC5_EML_3] = mmTPC5_EML_BUSMON_3_BASE,
+ [GOYA_BMON_TPC6_EML_0] = mmTPC6_EML_BUSMON_0_BASE,
+ [GOYA_BMON_TPC6_EML_1] = mmTPC6_EML_BUSMON_1_BASE,
+ [GOYA_BMON_TPC6_EML_2] = mmTPC6_EML_BUSMON_2_BASE,
+ [GOYA_BMON_TPC6_EML_3] = mmTPC6_EML_BUSMON_3_BASE,
+ [GOYA_BMON_TPC7_EML_0] = mmTPC7_EML_BUSMON_0_BASE,
+ [GOYA_BMON_TPC7_EML_1] = mmTPC7_EML_BUSMON_1_BASE,
+ [GOYA_BMON_TPC7_EML_2] = mmTPC7_EML_BUSMON_2_BASE,
+ [GOYA_BMON_TPC7_EML_3] = mmTPC7_EML_BUSMON_3_BASE
+};
+
+static u64 debug_spmu_regs[GOYA_SPMU_LAST + 1] = {
+ [GOYA_SPMU_DMA_CH_0_CS] = mmDMA_CH_0_CS_SPMU_BASE,
+ [GOYA_SPMU_DMA_CH_1_CS] = mmDMA_CH_1_CS_SPMU_BASE,
+ [GOYA_SPMU_DMA_CH_2_CS] = mmDMA_CH_2_CS_SPMU_BASE,
+ [GOYA_SPMU_DMA_CH_3_CS] = mmDMA_CH_3_CS_SPMU_BASE,
+ [GOYA_SPMU_DMA_CH_4_CS] = mmDMA_CH_4_CS_SPMU_BASE,
+ [GOYA_SPMU_DMA_MACRO_CS] = mmDMA_MACRO_CS_SPMU_BASE,
+ [GOYA_SPMU_MME1_SBA] = mmMME1_SBA_SPMU_BASE,
+ [GOYA_SPMU_MME3_SBB] = mmMME3_SBB_SPMU_BASE,
+ [GOYA_SPMU_MME4_WACS2] = mmMME4_WACS2_SPMU_BASE,
+ [GOYA_SPMU_MME4_WACS] = mmMME4_WACS_SPMU_BASE,
+ [GOYA_SPMU_MMU_CS] = mmMMU_CS_SPMU_BASE,
+ [GOYA_SPMU_PCIE] = mmPCIE_SPMU_BASE,
+ [GOYA_SPMU_TPC0_EML] = mmTPC0_EML_SPMU_BASE,
+ [GOYA_SPMU_TPC1_EML] = mmTPC1_EML_SPMU_BASE,
+ [GOYA_SPMU_TPC2_EML] = mmTPC2_EML_SPMU_BASE,
+ [GOYA_SPMU_TPC3_EML] = mmTPC3_EML_SPMU_BASE,
+ [GOYA_SPMU_TPC4_EML] = mmTPC4_EML_SPMU_BASE,
+ [GOYA_SPMU_TPC5_EML] = mmTPC5_EML_SPMU_BASE,
+ [GOYA_SPMU_TPC6_EML] = mmTPC6_EML_SPMU_BASE,
+ [GOYA_SPMU_TPC7_EML] = mmTPC7_EML_SPMU_BASE
+};
+
+static int goya_coresight_timeout(struct hl_device *hdev, u64 addr,
+ int position, bool up)
+{
+ int rc;
+ u32 val, timeout_usec;
+
+ if (hdev->pldm)
+ timeout_usec = GOYA_PLDM_CORESIGHT_TIMEOUT_USEC;
+ else
+ timeout_usec = CORESIGHT_TIMEOUT_USEC;
+
+ rc = hl_poll_timeout(
+ hdev,
+ addr,
+ val,
+ up ? val & BIT(position) : !(val & BIT(position)),
+ 1000,
+ timeout_usec);
+
+ if (rc) {
+ dev_err(hdev->dev,
+ "Timeout while waiting for coresight, addr: 0x%llx, position: %d, up: %d\n",
+ addr, position, up);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int goya_config_stm(struct hl_device *hdev,
+ struct hl_debug_params *params)
+{
+ struct hl_debug_params_stm *input;
+ u64 base_reg = debug_stm_regs[params->reg_idx] - CFG_BASE;
+ int rc;
+
+ WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
+
+ if (params->enable) {
+ input = params->input;
+
+ if (!input)
+ return -EINVAL;
+
+ WREG32(base_reg + 0xE80, 0x80004);
+ WREG32(base_reg + 0xD64, 7);
+ WREG32(base_reg + 0xD60, 0);
+ WREG32(base_reg + 0xD00, lower_32_bits(input->he_mask));
+ WREG32(base_reg + 0xD20, lower_32_bits(input->sp_mask));
+ WREG32(base_reg + 0xD60, 1);
+ WREG32(base_reg + 0xD00, upper_32_bits(input->he_mask));
+ WREG32(base_reg + 0xD20, upper_32_bits(input->sp_mask));
+ WREG32(base_reg + 0xE70, 0x10);
+ WREG32(base_reg + 0xE60, 0);
+ WREG32(base_reg + 0xE64, 0x420000);
+ WREG32(base_reg + 0xE00, 0xFFFFFFFF);
+ WREG32(base_reg + 0xE20, 0xFFFFFFFF);
+ WREG32(base_reg + 0xEF4, input->id);
+ WREG32(base_reg + 0xDF4, 0x80);
+ WREG32(base_reg + 0xE8C, input->frequency);
+ WREG32(base_reg + 0xE90, 0x7FF);
+ WREG32(base_reg + 0xE80, 0x7 | (input->id << 16));
+ } else {
+ WREG32(base_reg + 0xE80, 4);
+ WREG32(base_reg + 0xD64, 0);
+ WREG32(base_reg + 0xD60, 1);
+ WREG32(base_reg + 0xD00, 0);
+ WREG32(base_reg + 0xD20, 0);
+ WREG32(base_reg + 0xD60, 0);
+ WREG32(base_reg + 0xE20, 0);
+ WREG32(base_reg + 0xE00, 0);
+ WREG32(base_reg + 0xDF4, 0x80);
+ WREG32(base_reg + 0xE70, 0);
+ WREG32(base_reg + 0xE60, 0);
+ WREG32(base_reg + 0xE64, 0);
+ WREG32(base_reg + 0xE8C, 0);
+
+ rc = goya_coresight_timeout(hdev, base_reg + 0xE80, 23, false);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to disable STM on timeout, error %d\n",
+ rc);
+ return rc;
+ }
+
+ WREG32(base_reg + 0xE80, 4);
+ }
+
+ return 0;
+}
+
+static int goya_config_etf(struct hl_device *hdev,
+ struct hl_debug_params *params)
+{
+ struct hl_debug_params_etf *input;
+ u64 base_reg = debug_etf_regs[params->reg_idx] - CFG_BASE;
+ u32 val;
+ int rc;
+
+ WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
+
+ val = RREG32(base_reg + 0x304);
+ val |= 0x1000;
+ WREG32(base_reg + 0x304, val);
+ val |= 0x40;
+ WREG32(base_reg + 0x304, val);
+
+ rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to %s ETF on timeout, error %d\n",
+ params->enable ? "enable" : "disable", rc);
+ return rc;
+ }
+
+ rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to %s ETF on timeout, error %d\n",
+ params->enable ? "enable" : "disable", rc);
+ return rc;
+ }
+
+ WREG32(base_reg + 0x20, 0);
+
+ if (params->enable) {
+ input = params->input;
+
+ if (!input)
+ return -EINVAL;
+
+ WREG32(base_reg + 0x34, 0x3FFC);
+ WREG32(base_reg + 0x28, input->sink_mode);
+ WREG32(base_reg + 0x304, 0x4001);
+ WREG32(base_reg + 0x308, 0xA);
+ WREG32(base_reg + 0x20, 1);
+ } else {
+ WREG32(base_reg + 0x34, 0);
+ WREG32(base_reg + 0x28, 0);
+ WREG32(base_reg + 0x304, 0);
+ }
+
+ return 0;
+}
+
+static int goya_etr_validate_address(struct hl_device *hdev, u64 addr,
+ u32 size)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 range_start, range_end;
+
+ if (hdev->mmu_enable) {
+ range_start = prop->va_space_dram_start_address;
+ range_end = prop->va_space_dram_end_address;
+ } else {
+ range_start = prop->dram_user_base_address;
+ range_end = prop->dram_end_address;
+ }
+
+ return hl_mem_area_inside_range(addr, size, range_start, range_end);
+}
+
+static int goya_config_etr(struct hl_device *hdev,
+ struct hl_debug_params *params)
+{
+ struct hl_debug_params_etr *input;
+ u64 base_reg = mmPSOC_ETR_BASE - CFG_BASE;
+ u32 val;
+ int rc;
+
+ WREG32(base_reg + 0xFB0, CORESIGHT_UNLOCK);
+
+ val = RREG32(base_reg + 0x304);
+ val |= 0x1000;
+ WREG32(base_reg + 0x304, val);
+ val |= 0x40;
+ WREG32(base_reg + 0x304, val);
+
+ rc = goya_coresight_timeout(hdev, base_reg + 0x304, 6, false);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n",
+ params->enable ? "enable" : "disable", rc);
+ return rc;
+ }
+
+ rc = goya_coresight_timeout(hdev, base_reg + 0xC, 2, true);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to %s ETR on timeout, error %d\n",
+ params->enable ? "enable" : "disable", rc);
+ return rc;
+ }
+
+ WREG32(base_reg + 0x20, 0);
+
+ if (params->enable) {
+ input = params->input;
+
+ if (!input)
+ return -EINVAL;
+
+ if (input->buffer_size == 0) {
+ dev_err(hdev->dev,
+ "ETR buffer size should be bigger than 0\n");
+ return -EINVAL;
+ }
+
+ if (!goya_etr_validate_address(hdev,
+ input->buffer_address, input->buffer_size)) {
+ dev_err(hdev->dev, "buffer address is not valid\n");
+ return -EINVAL;
+ }
+
+ WREG32(base_reg + 0x34, 0x3FFC);
+ WREG32(base_reg + 0x4, input->buffer_size);
+ WREG32(base_reg + 0x28, input->sink_mode);
+ WREG32(base_reg + 0x110, 0x700);
+ WREG32(base_reg + 0x118,
+ lower_32_bits(input->buffer_address));
+ WREG32(base_reg + 0x11C,
+ upper_32_bits(input->buffer_address));
+ WREG32(base_reg + 0x304, 3);
+ WREG32(base_reg + 0x308, 0xA);
+ WREG32(base_reg + 0x20, 1);
+ } else {
+ WREG32(base_reg + 0x34, 0);
+ WREG32(base_reg + 0x4, 0x400);
+ WREG32(base_reg + 0x118, 0);
+ WREG32(base_reg + 0x11C, 0);
+ WREG32(base_reg + 0x308, 0);
+ WREG32(base_reg + 0x28, 0);
+ WREG32(base_reg + 0x304, 0);
+
+ if (params->output_size >= sizeof(u32))
+ *(u32 *) params->output = RREG32(base_reg + 0x18);
+ }
+
+ return 0;
+}
+
+static int goya_config_funnel(struct hl_device *hdev,
+ struct hl_debug_params *params)
+{
+ WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE + 0xFB0,
+ CORESIGHT_UNLOCK);
+
+ WREG32(debug_funnel_regs[params->reg_idx] - CFG_BASE,
+ params->enable ? 0x33F : 0);
+
+ return 0;
+}
+
+static int goya_config_bmon(struct hl_device *hdev,
+ struct hl_debug_params *params)
+{
+ struct hl_debug_params_bmon *input;
+ u64 base_reg = debug_bmon_regs[params->reg_idx] - CFG_BASE;
+ u32 pcie_base = 0;
+
+ WREG32(base_reg + 0x104, 1);
+
+ if (params->enable) {
+ input = params->input;
+
+ if (!input)
+ return -EINVAL;
+
+ WREG32(base_reg + 0x208, lower_32_bits(input->addr_range0));
+ WREG32(base_reg + 0x20C, upper_32_bits(input->addr_range0));
+ WREG32(base_reg + 0x248, lower_32_bits(input->addr_range1));
+ WREG32(base_reg + 0x24C, upper_32_bits(input->addr_range1));
+ WREG32(base_reg + 0x224, 0);
+ WREG32(base_reg + 0x234, 0);
+ WREG32(base_reg + 0x30C, input->bw_win);
+ WREG32(base_reg + 0x308, input->win_capture);
+
+ /* PCIE IF BMON bug WA */
+ if (params->reg_idx != GOYA_BMON_PCIE_MSTR_RD &&
+ params->reg_idx != GOYA_BMON_PCIE_MSTR_WR &&
+ params->reg_idx != GOYA_BMON_PCIE_SLV_RD &&
+ params->reg_idx != GOYA_BMON_PCIE_SLV_WR)
+ pcie_base = 0xA000000;
+
+ WREG32(base_reg + 0x700, pcie_base | 0xB00 | (input->id << 12));
+ WREG32(base_reg + 0x708, pcie_base | 0xA00 | (input->id << 12));
+ WREG32(base_reg + 0x70C, pcie_base | 0xC00 | (input->id << 12));
+
+ WREG32(base_reg + 0x100, 0x11);
+ WREG32(base_reg + 0x304, 0x1);
+ } else {
+ WREG32(base_reg + 0x208, 0xFFFFFFFF);
+ WREG32(base_reg + 0x20C, 0xFFFFFFFF);
+ WREG32(base_reg + 0x248, 0xFFFFFFFF);
+ WREG32(base_reg + 0x24C, 0xFFFFFFFF);
+ WREG32(base_reg + 0x224, 0xFFFFFFFF);
+ WREG32(base_reg + 0x234, 0x1070F);
+ WREG32(base_reg + 0x30C, 0);
+ WREG32(base_reg + 0x308, 0xFFFF);
+ WREG32(base_reg + 0x700, 0xA000B00);
+ WREG32(base_reg + 0x708, 0xA000A00);
+ WREG32(base_reg + 0x70C, 0xA000C00);
+ WREG32(base_reg + 0x100, 1);
+ WREG32(base_reg + 0x304, 0);
+ WREG32(base_reg + 0x104, 0);
+ }
+
+ return 0;
+}
+
+static int goya_config_spmu(struct hl_device *hdev,
+ struct hl_debug_params *params)
+{
+ u64 base_reg = debug_spmu_regs[params->reg_idx] - CFG_BASE;
+ struct hl_debug_params_spmu *input = params->input;
+ u64 *output;
+ u32 output_arr_len;
+ u32 events_num;
+ u32 overflow_idx;
+ u32 cycle_cnt_idx;
+ int i;
+
+ if (params->enable) {
+ input = params->input;
+
+ if (!input)
+ return -EINVAL;
+
+ if (input->event_types_num < 3) {
+ dev_err(hdev->dev,
+ "not enough values for SPMU enable\n");
+ return -EINVAL;
+ }
+
+ WREG32(base_reg + 0xE04, 0x41013046);
+ WREG32(base_reg + 0xE04, 0x41013040);
+
+ for (i = 0 ; i < input->event_types_num ; i++)
+ WREG32(base_reg + 0x400 + i * 4, input->event_types[i]);
+
+ WREG32(base_reg + 0xE04, 0x41013041);
+ WREG32(base_reg + 0xC00, 0x8000003F);
+ } else {
+ output = params->output;
+ output_arr_len = params->output_size / 8;
+ events_num = output_arr_len - 2;
+ overflow_idx = output_arr_len - 2;
+ cycle_cnt_idx = output_arr_len - 1;
+
+ if (!output)
+ return -EINVAL;
+
+ if (output_arr_len < 3) {
+ dev_err(hdev->dev,
+ "not enough values for SPMU disable\n");
+ return -EINVAL;
+ }
+
+ WREG32(base_reg + 0xE04, 0x41013040);
+
+ for (i = 0 ; i < events_num ; i++)
+ output[i] = RREG32(base_reg + i * 8);
+
+ output[overflow_idx] = RREG32(base_reg + 0xCC0);
+
+ output[cycle_cnt_idx] = RREG32(base_reg + 0xFC);
+ output[cycle_cnt_idx] <<= 32;
+ output[cycle_cnt_idx] |= RREG32(base_reg + 0xF8);
+
+ WREG32(base_reg + 0xCC0, 0);
+ }
+
+ return 0;
+}
+
+static int goya_config_timestamp(struct hl_device *hdev,
+ struct hl_debug_params *params)
+{
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
+ if (params->enable) {
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0xC, 0);
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE + 0x8, 0);
+ WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 1);
+ }
+
+ return 0;
+}
+
+int goya_debug_coresight(struct hl_device *hdev, void *data)
+{
+ struct hl_debug_params *params = data;
+ u32 val;
+ int rc;
+
+ switch (params->op) {
+ case HL_DEBUG_OP_STM:
+ rc = goya_config_stm(hdev, params);
+ break;
+ case HL_DEBUG_OP_ETF:
+ rc = goya_config_etf(hdev, params);
+ break;
+ case HL_DEBUG_OP_ETR:
+ rc = goya_config_etr(hdev, params);
+ break;
+ case HL_DEBUG_OP_FUNNEL:
+ rc = goya_config_funnel(hdev, params);
+ break;
+ case HL_DEBUG_OP_BMON:
+ rc = goya_config_bmon(hdev, params);
+ break;
+ case HL_DEBUG_OP_SPMU:
+ rc = goya_config_spmu(hdev, params);
+ break;
+ case HL_DEBUG_OP_TIMESTAMP:
+ rc = goya_config_timestamp(hdev, params);
+ break;
+
+ default:
+ dev_err(hdev->dev, "Unknown coresight id %d\n", params->op);
+ return -EINVAL;
+ }
+
+ /* Perform read from the device to flush all configuration */
+ val = RREG32(mmPCIE_DBI_DEVICE_ID_VENDOR_ID_REG);
+
+ return rc;
+}
diff --git a/drivers/misc/habanalabs/goya/goya_security.c b/drivers/misc/habanalabs/goya/goya_security.c
index 575003238401..d95d1b2f860d 100644
--- a/drivers/misc/habanalabs/goya/goya_security.c
+++ b/drivers/misc/habanalabs/goya/goya_security.c
@@ -6,6 +6,7 @@
*/
#include "goyaP.h"
+#include "include/goya/asic_reg/goya_regs.h"
/*
* goya_set_block_as_protected - set the given block as protected
@@ -2159,6 +2160,8 @@ static void goya_init_protection_bits(struct hl_device *hdev)
* Bits 7-11 represents the word offset inside the 128 bytes.
* Bits 2-6 represents the bit location inside the word.
*/
+ u32 pb_addr, mask;
+ u8 word_offset;
goya_pb_set_block(hdev, mmPCI_NRTR_BASE);
goya_pb_set_block(hdev, mmPCI_RD_REGULATOR_BASE);
@@ -2237,6 +2240,14 @@ static void goya_init_protection_bits(struct hl_device *hdev)
goya_pb_set_block(hdev, mmPCIE_AUX_BASE);
goya_pb_set_block(hdev, mmPCIE_DB_RSV_BASE);
goya_pb_set_block(hdev, mmPCIE_PHY_BASE);
+ goya_pb_set_block(hdev, mmTPC0_NRTR_BASE);
+ goya_pb_set_block(hdev, mmTPC_PLL_BASE);
+
+ pb_addr = (mmTPC_PLL_CLK_RLX_0 & ~0xFFF) + PROT_BITS_OFFS;
+ word_offset = ((mmTPC_PLL_CLK_RLX_0 & PROT_BITS_OFFS) >> 7) << 2;
+ mask = 1 << ((mmTPC_PLL_CLK_RLX_0 & 0x7C) >> 2);
+
+ WREG32(pb_addr + word_offset, mask);
goya_init_mme_protection_bits(hdev);
@@ -2294,8 +2305,8 @@ void goya_init_security(struct hl_device *hdev)
u32 lbw_rng10_base = 0xFCC60000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK;
u32 lbw_rng10_mask = 0xFFFE0000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK;
- u32 lbw_rng11_base = 0xFCE00000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK;
- u32 lbw_rng11_mask = 0xFFFFC000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK;
+ u32 lbw_rng11_base = 0xFCE02000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK;
+ u32 lbw_rng11_mask = 0xFFFFE000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK;
u32 lbw_rng12_base = 0xFE484000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK;
u32 lbw_rng12_mask = 0xFFFFF000 & DMA_MACRO_LBW_RANGE_BASE_R_MASK;
diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h
index a8ee52c880cd..2f02bb55f66a 100644
--- a/drivers/misc/habanalabs/habanalabs.h
+++ b/drivers/misc/habanalabs/habanalabs.h
@@ -33,6 +33,9 @@
#define HL_PLL_LOW_JOB_FREQ_USEC 5000000 /* 5 s */
+#define HL_ARMCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_ARMCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
+
#define HL_MAX_QUEUES 128
#define HL_MAX_JOBS_PER_CS 64
@@ -48,8 +51,9 @@
/**
* struct pgt_info - MMU hop page info.
- * @node: hash linked-list node for the pgts hash of pgts.
- * @addr: physical address of the pgt.
+ * @node: hash linked-list node for the pgts shadow hash of pgts.
+ * @phys_addr: physical address of the pgt.
+ * @shadow_addr: shadow hop in the host.
* @ctx: pointer to the owner ctx.
* @num_of_ptes: indicates how many ptes are used in the pgt.
*
@@ -59,10 +63,11 @@
* page, it is freed with its pgt_info structure.
*/
struct pgt_info {
- struct hlist_node node;
- u64 addr;
- struct hl_ctx *ctx;
- int num_of_ptes;
+ struct hlist_node node;
+ u64 phys_addr;
+ u64 shadow_addr;
+ struct hl_ctx *ctx;
+ int num_of_ptes;
};
struct hl_device;
@@ -145,6 +150,8 @@ enum hl_device_hw_state {
* mapping DRAM memory.
* @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page
* fault.
+ * @pcie_dbi_base_address: Base address of the PCIE_DBI block.
+ * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register.
* @mmu_pgt_addr: base physical address in DRAM of MMU page tables.
* @mmu_dram_default_page_addr: DRAM default page physical address.
* @mmu_pgt_size: MMU page tables total size.
@@ -186,6 +193,8 @@ struct asic_fixed_properties {
u64 va_space_dram_start_address;
u64 va_space_dram_end_address;
u64 dram_size_for_default_page_mapping;
+ u64 pcie_dbi_base_address;
+ u64 pcie_aux_dbi_reg_addr;
u64 mmu_pgt_addr;
u64 mmu_dram_default_page_addr;
u32 mmu_pgt_size;
@@ -381,14 +390,12 @@ struct hl_eq {
/**
* enum hl_asic_type - supported ASIC types.
- * @ASIC_AUTO_DETECT: ASIC type will be automatically set.
- * @ASIC_GOYA: Goya device.
* @ASIC_INVALID: Invalid ASIC type.
+ * @ASIC_GOYA: Goya device.
*/
enum hl_asic_type {
- ASIC_AUTO_DETECT,
- ASIC_GOYA,
- ASIC_INVALID
+ ASIC_INVALID,
+ ASIC_GOYA
};
struct hl_cs_parser;
@@ -472,8 +479,7 @@ enum hl_pll_frequency {
* @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with
* ASID-VA-size mask.
* @send_heartbeat: send is-alive packet to ArmCP and verify response.
- * @enable_clock_gating: enable clock gating for reducing power consumption.
- * @disable_clock_gating: disable clock for accessing registers on HBW.
+ * @debug_coresight: perform certain actions on Coresight for debugging.
* @is_device_idle: return true if device is idle, false otherwise.
* @soft_reset_late_init: perform certain actions needed after soft reset.
* @hw_queues_lock: acquire H/W queues lock.
@@ -482,6 +488,9 @@ enum hl_pll_frequency {
* @get_eeprom_data: retrieve EEPROM data from F/W.
* @send_cpu_message: send buffer to ArmCP.
* @get_hw_state: retrieve the H/W state
+ * @pci_bars_map: Map PCI BARs.
+ * @set_dram_bar_base: Set DRAM BAR to map specific device address.
+ * @init_iatu: Initialize the iATU unit inside the PCI controller.
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -543,9 +552,8 @@ struct hl_asic_funcs {
void (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
- void (*enable_clock_gating)(struct hl_device *hdev);
- void (*disable_clock_gating)(struct hl_device *hdev);
- bool (*is_device_idle)(struct hl_device *hdev);
+ int (*debug_coresight)(struct hl_device *hdev, void *data);
+ bool (*is_device_idle)(struct hl_device *hdev, char *buf, size_t size);
int (*soft_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -555,6 +563,9 @@ struct hl_asic_funcs {
int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
u16 len, u32 timeout, long *result);
enum hl_device_hw_state (*get_hw_state)(struct hl_device *hdev);
+ int (*pci_bars_map)(struct hl_device *hdev);
+ int (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+ int (*init_iatu)(struct hl_device *hdev);
};
@@ -582,7 +593,8 @@ struct hl_va_range {
* struct hl_ctx - user/kernel context.
* @mem_hash: holds mapping from virtual address to virtual memory area
* descriptor (hl_vm_phys_pg_list or hl_userptr).
- * @mmu_hash: holds a mapping from virtual address to pgt_info structure.
+ * @mmu_phys_hash: holds a mapping from physical address to pgt_info structure.
+ * @mmu_shadow_hash: holds a mapping from shadow address to pgt_info structure.
* @hpriv: pointer to the private (KMD) data of the process (fd).
* @hdev: pointer to the device structure.
* @refcount: reference counter for the context. Context is released only when
@@ -611,7 +623,8 @@ struct hl_va_range {
*/
struct hl_ctx {
DECLARE_HASHTABLE(mem_hash, MEM_HASH_TABLE_BITS);
- DECLARE_HASHTABLE(mmu_hash, MMU_HASH_TABLE_BITS);
+ DECLARE_HASHTABLE(mmu_phys_hash, MMU_HASH_TABLE_BITS);
+ DECLARE_HASHTABLE(mmu_shadow_hash, MMU_HASH_TABLE_BITS);
struct hl_fpriv *hpriv;
struct hl_device *hdev;
struct kref refcount;
@@ -850,6 +863,29 @@ struct hl_vm {
u8 init_done;
};
+
+/*
+ * DEBUG, PROFILING STRUCTURE
+ */
+
+/**
+ * struct hl_debug_params - Coresight debug parameters.
+ * @input: pointer to component specific input parameters.
+ * @output: pointer to component specific output parameters.
+ * @output_size: size of output buffer.
+ * @reg_idx: relevant register ID.
+ * @op: component operation to execute.
+ * @enable: true if to enable component debugging, false otherwise.
+ */
+struct hl_debug_params {
+ void *input;
+ void *output;
+ u32 output_size;
+ u32 reg_idx;
+ u32 op;
+ bool enable;
+};
+
/*
* FILE PRIVATE STRUCTURE
*/
@@ -997,6 +1033,12 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | \
(val) << REG_FIELD_SHIFT(reg, field))
+#define HL_ENG_BUSY(buf, size, fmt, ...) ({ \
+ if (buf) \
+ snprintf(buf, size, fmt, ##__VA_ARGS__); \
+ false; \
+ })
+
struct hwmon_chip_info;
/**
@@ -1047,7 +1089,8 @@ struct hl_device_reset_work {
* @asic_specific: ASIC specific information to use only from ASIC files.
* @mmu_pgt_pool: pool of available MMU hops.
* @vm: virtual memory manager for MMU.
- * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context
+ * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
+ * @mmu_shadow_hop0: shadow mapping of the MMU hop 0 zone.
* @hwmon_dev: H/W monitor device.
* @pm_mng_profile: current power management profile.
* @hl_chip_info: ASIC's sensors information.
@@ -1082,6 +1125,7 @@ struct hl_device_reset_work {
* @init_done: is the initialization of the device done.
* @mmu_enable: is MMU enabled.
* @device_cpu_disabled: is the device CPU disabled (due to timeouts)
+ * @dma_mask: the dma mask that was set for this device
*/
struct hl_device {
struct pci_dev *pdev;
@@ -1117,6 +1161,7 @@ struct hl_device {
struct gen_pool *mmu_pgt_pool;
struct hl_vm vm;
struct mutex mmu_cache_lock;
+ void *mmu_shadow_hop0;
struct device *hwmon_dev;
enum hl_pm_mng_profile pm_mng_profile;
struct hwmon_chip_info *hl_chip_info;
@@ -1151,6 +1196,7 @@ struct hl_device {
u8 dram_default_page_mapping;
u8 init_done;
u8 device_cpu_disabled;
+ u8 dma_mask;
/* Parameters for bring-up */
u8 mmu_enable;
@@ -1245,6 +1291,7 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
int hl_device_open(struct inode *inode, struct file *filp);
bool hl_device_disabled_or_in_reset(struct hl_device *hdev);
+enum hl_device_status hl_device_status(struct hl_device *hdev);
int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
enum hl_asic_type asic_type, int minor);
void destroy_hdev(struct hl_device *hdev);
@@ -1351,6 +1398,31 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);
+int hl_fw_push_fw_to_device(struct hl_device *hdev, const char *fw_name,
+ void __iomem *dst);
+int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode);
+int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
+ u16 len, u32 timeout, long *result);
+int hl_fw_test_cpu_queue(struct hl_device *hdev);
+void *hl_fw_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle);
+void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
+ void *vaddr);
+int hl_fw_send_heartbeat(struct hl_device *hdev);
+int hl_fw_armcp_info_get(struct hl_device *hdev);
+int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
+
+int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
+ bool is_wc[3]);
+int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
+int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
+ u64 addr);
+int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
+ u64 dram_base_address, u64 host_phys_size);
+int hl_pci_init(struct hl_device *hdev, u8 dma_mask);
+void hl_pci_fini(struct hl_device *hdev);
+int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask);
+
long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr);
void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq);
long hl_get_temperature(struct hl_device *hdev, int sensor_index, u32 attr);
diff --git a/drivers/misc/habanalabs/habanalabs_drv.c b/drivers/misc/habanalabs/habanalabs_drv.c
index 748601463f11..1667df7ca64c 100644
--- a/drivers/misc/habanalabs/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/habanalabs_drv.c
@@ -218,7 +218,7 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->disabled = true;
hdev->pdev = pdev; /* can be NULL in case of simulator device */
- if (asic_type == ASIC_AUTO_DETECT) {
+ if (pdev) {
hdev->asic_type = get_asic_type(pdev->device);
if (hdev->asic_type == ASIC_INVALID) {
dev_err(&pdev->dev, "Unsupported ASIC\n");
@@ -229,6 +229,9 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
hdev->asic_type = asic_type;
}
+ /* Set default DMA mask to 32 bits */
+ hdev->dma_mask = 32;
+
mutex_lock(&hl_devs_idr_lock);
if (minor == -1) {
@@ -334,7 +337,7 @@ static int hl_pci_probe(struct pci_dev *pdev,
" device found [%04x:%04x] (rev %x)\n",
(int)pdev->vendor, (int)pdev->device, (int)pdev->revision);
- rc = create_hdev(&hdev, pdev, ASIC_AUTO_DETECT, -1);
+ rc = create_hdev(&hdev, pdev, ASIC_INVALID, -1);
if (rc)
return rc;
diff --git a/drivers/misc/habanalabs/habanalabs_ioctl.c b/drivers/misc/habanalabs/habanalabs_ioctl.c
index 2c2739a3c5ec..eeefb22023e9 100644
--- a/drivers/misc/habanalabs/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/habanalabs_ioctl.c
@@ -12,6 +12,32 @@
#include <linux/uaccess.h>
#include <linux/slab.h>
+static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
+ [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
+ [HL_DEBUG_OP_ETF] = sizeof(struct hl_debug_params_etf),
+ [HL_DEBUG_OP_STM] = sizeof(struct hl_debug_params_stm),
+ [HL_DEBUG_OP_FUNNEL] = 0,
+ [HL_DEBUG_OP_BMON] = sizeof(struct hl_debug_params_bmon),
+ [HL_DEBUG_OP_SPMU] = sizeof(struct hl_debug_params_spmu),
+ [HL_DEBUG_OP_TIMESTAMP] = 0
+
+};
+
+static int device_status_info(struct hl_device *hdev, struct hl_info_args *args)
+{
+ struct hl_info_device_status dev_stat = {0};
+ u32 size = args->return_size;
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+ if ((!size) || (!out))
+ return -EINVAL;
+
+ dev_stat.status = hl_device_status(hdev);
+
+ return copy_to_user(out, &dev_stat,
+ min((size_t)size, sizeof(dev_stat))) ? -EFAULT : 0;
+}
+
static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
{
struct hl_info_hw_ip_info hw_ip = {0};
@@ -93,21 +119,91 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev);
+ hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev, NULL, 0);
return copy_to_user(out, &hw_idle,
min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
}
+static int debug_coresight(struct hl_device *hdev, struct hl_debug_args *args)
+{
+ struct hl_debug_params *params;
+ void *input = NULL, *output = NULL;
+ int rc;
+
+ params = kzalloc(sizeof(*params), GFP_KERNEL);
+ if (!params)
+ return -ENOMEM;
+
+ params->reg_idx = args->reg_idx;
+ params->enable = args->enable;
+ params->op = args->op;
+
+ if (args->input_ptr && args->input_size) {
+ input = memdup_user((const void __user *) args->input_ptr,
+ args->input_size);
+ if (IS_ERR(input)) {
+ rc = PTR_ERR(input);
+ input = NULL;
+ dev_err(hdev->dev,
+ "error %d when copying input debug data\n", rc);
+ goto out;
+ }
+
+ params->input = input;
+ }
+
+ if (args->output_ptr && args->output_size) {
+ output = kzalloc(args->output_size, GFP_KERNEL);
+ if (!output) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ params->output = output;
+ params->output_size = args->output_size;
+ }
+
+ rc = hdev->asic_funcs->debug_coresight(hdev, params);
+ if (rc) {
+ dev_err(hdev->dev,
+ "debug coresight operation failed %d\n", rc);
+ goto out;
+ }
+
+ if (output) {
+ if (copy_to_user((void __user *) (uintptr_t) args->output_ptr,
+ output,
+ args->output_size)) {
+ dev_err(hdev->dev,
+ "copy to user failed in debug ioctl\n");
+ rc = -EFAULT;
+ goto out;
+ }
+ }
+
+out:
+ kfree(params);
+ kfree(output);
+ kfree(input);
+
+ return rc;
+}
+
static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
{
struct hl_info_args *args = data;
struct hl_device *hdev = hpriv->hdev;
int rc;
+ /* We want to return device status even if it disabled or in reset */
+ if (args->op == HL_INFO_DEVICE_STATUS)
+ return device_status_info(hdev, args);
+
if (hl_device_disabled_or_in_reset(hdev)) {
- dev_err(hdev->dev,
- "Device is disabled or in reset. Can't execute INFO IOCTL\n");
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't execute INFO IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
return -EBUSY;
}
@@ -137,6 +233,40 @@ static int hl_info_ioctl(struct hl_fpriv *hpriv, void *data)
return rc;
}
+static int hl_debug_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+ struct hl_debug_args *args = data;
+ struct hl_device *hdev = hpriv->hdev;
+ int rc = 0;
+
+ if (hl_device_disabled_or_in_reset(hdev)) {
+ dev_warn_ratelimited(hdev->dev,
+ "Device is %s. Can't execute DEBUG IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+ return -EBUSY;
+ }
+
+ switch (args->op) {
+ case HL_DEBUG_OP_ETR:
+ case HL_DEBUG_OP_ETF:
+ case HL_DEBUG_OP_STM:
+ case HL_DEBUG_OP_FUNNEL:
+ case HL_DEBUG_OP_BMON:
+ case HL_DEBUG_OP_SPMU:
+ case HL_DEBUG_OP_TIMESTAMP:
+ args->input_size =
+ min(args->input_size, hl_debug_struct_size[args->op]);
+ rc = debug_coresight(hdev, args);
+ break;
+ default:
+ dev_err(hdev->dev, "Invalid request %d\n", args->op);
+ rc = -ENOTTY;
+ break;
+ }
+
+ return rc;
+}
+
#define HL_IOCTL_DEF(ioctl, _func) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func}
@@ -145,7 +275,8 @@ static const struct hl_ioctl_desc hl_ioctls[] = {
HL_IOCTL_DEF(HL_IOCTL_CB, hl_cb_ioctl),
HL_IOCTL_DEF(HL_IOCTL_CS, hl_cs_ioctl),
HL_IOCTL_DEF(HL_IOCTL_WAIT_CS, hl_cs_wait_ioctl),
- HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl)
+ HL_IOCTL_DEF(HL_IOCTL_MEMORY, hl_mem_ioctl),
+ HL_IOCTL_DEF(HL_IOCTL_DEBUG, hl_debug_ioctl)
};
#define HL_CORE_IOCTL_COUNT ARRAY_SIZE(hl_ioctls)
diff --git a/drivers/misc/habanalabs/include/armcp_if.h b/drivers/misc/habanalabs/include/armcp_if.h
index 9dddb917e72c..c8f28cadc335 100644
--- a/drivers/misc/habanalabs/include/armcp_if.h
+++ b/drivers/misc/habanalabs/include/armcp_if.h
@@ -32,8 +32,6 @@ struct hl_eq_entry {
#define EQ_CTL_EVENT_TYPE_SHIFT 16
#define EQ_CTL_EVENT_TYPE_MASK 0x03FF0000
-#define EVENT_QUEUE_MSIX_IDX 5
-
enum pq_init_status {
PQ_INIT_STATUS_NA = 0,
PQ_INIT_STATUS_READY_FOR_CP,
@@ -302,6 +300,14 @@ enum armcp_pwm_attributes {
armcp_pwm_enable
};
+#define HL_CPU_PKT_SHIFT 5
+#define HL_CPU_PKT_SIZE (1 << HL_CPU_PKT_SHIFT)
+#define HL_CPU_PKT_MASK (~((1 << HL_CPU_PKT_SHIFT) - 1))
+#define HL_CPU_MAX_PKTS_IN_CB 32
+#define HL_CPU_CB_SIZE (HL_CPU_PKT_SIZE * \
+ HL_CPU_MAX_PKTS_IN_CB)
+#define HL_CPU_ACCESSIBLE_MEM_SIZE (HL_QUEUE_LENGTH * HL_CPU_CB_SIZE)
+
/* Event Queue Packets */
struct eq_generic_event {
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h
index 2cf5c46b6e8e..4e0dbbbbde20 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_masks.h
@@ -188,4 +188,3 @@
#define CPU_CA53_CFG_ARM_PMU_EVENT_MASK 0x3FFFFFFF
#endif /* ASIC_REG_CPU_CA53_CFG_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h
index 840ccffa1081..f3faf1aad91a 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_ca53_cfg_regs.h
@@ -58,4 +58,3 @@
#define mmCPU_CA53_CFG_ARM_PMU_1 0x441214
#endif /* ASIC_REG_CPU_CA53_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h
index f23cb3e41c30..cf657918962a 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_if_regs.h
@@ -46,4 +46,3 @@
#define mmCPU_IF_AXI_SPLIT_INTR 0x442130
#endif /* ASIC_REG_CPU_IF_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h
index 8fc97f838ada..8c8f9726d4b9 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/cpu_pll_regs.h
@@ -102,4 +102,3 @@
#define mmCPU_PLL_FREQ_CALC_EN 0x4A2440
#endif /* ASIC_REG_CPU_PLL_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h
index 61c8cd9ce58b..0b246fe6ad04 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_0_regs.h
@@ -206,4 +206,3 @@
#define mmDMA_CH_0_MEM_INIT_BUSY 0x4011FC
#endif /* ASIC_REG_DMA_CH_0_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h
index 92960ef5e308..5449031722f2 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_1_regs.h
@@ -206,4 +206,3 @@
#define mmDMA_CH_1_MEM_INIT_BUSY 0x4091FC
#endif /* ASIC_REG_DMA_CH_1_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h
index 4e37871a51bb..a4768521d18a 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_2_regs.h
@@ -206,4 +206,3 @@
#define mmDMA_CH_2_MEM_INIT_BUSY 0x4111FC
#endif /* ASIC_REG_DMA_CH_2_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h
index a2d6aeb32a18..619d01897ff8 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_3_regs.h
@@ -206,4 +206,3 @@
#define mmDMA_CH_3_MEM_INIT_BUSY 0x4191FC
#endif /* ASIC_REG_DMA_CH_3_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h
index 400d6fd3acf5..038617e163f1 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_ch_4_regs.h
@@ -206,4 +206,3 @@
#define mmDMA_CH_4_MEM_INIT_BUSY 0x4211FC
#endif /* ASIC_REG_DMA_CH_4_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h
index 8d965443c51e..f43b564af1be 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_masks.h
@@ -102,4 +102,3 @@
#define DMA_MACRO_RAZWI_HBW_RD_ID_R_MASK 0x1FFFFFFF
#endif /* ASIC_REG_DMA_MACRO_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h
index 8bfcb001189d..c3bfc1b8e3fd 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_macro_regs.h
@@ -178,4 +178,3 @@
#define mmDMA_MACRO_RAZWI_HBW_RD_ID 0x4B0158
#endif /* ASIC_REG_DMA_MACRO_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h
index 9f33f351a3c1..bc977488c072 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_masks.h
@@ -206,4 +206,3 @@
#define DMA_NRTR_NON_LIN_SCRAMB_EN_MASK 0x1
#endif /* ASIC_REG_DMA_NRTR_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h
index d8293745a02b..c4abc7ff1fc6 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_nrtr_regs.h
@@ -224,4 +224,3 @@
#define mmDMA_NRTR_NON_LIN_SCRAMB 0x1C0604
#endif /* ASIC_REG_DMA_NRTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h
index 10619dbb9b17..b17f72c31ab6 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_masks.h
@@ -462,4 +462,3 @@
#define DMA_QM_0_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF
#endif /* ASIC_REG_DMA_QM_0_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h
index c693bc5dcb22..bf360b301154 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_0_regs.h
@@ -176,4 +176,3 @@
#define mmDMA_QM_0_CQ_BUF_RDATA 0x40030C
#endif /* ASIC_REG_DMA_QM_0_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h
index da928390f89c..51d432d05ac4 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_1_regs.h
@@ -176,4 +176,3 @@
#define mmDMA_QM_1_CQ_BUF_RDATA 0x40830C
#endif /* ASIC_REG_DMA_QM_1_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h
index b4f06e9b71d6..18fc0c2b6cc2 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_2_regs.h
@@ -176,4 +176,3 @@
#define mmDMA_QM_2_CQ_BUF_RDATA 0x41030C
#endif /* ASIC_REG_DMA_QM_2_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h
index 53e3cd78a06b..6cf7204bf5cc 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_3_regs.h
@@ -176,4 +176,3 @@
#define mmDMA_QM_3_CQ_BUF_RDATA 0x41830C
#endif /* ASIC_REG_DMA_QM_3_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h
index e0eb5f260201..36fef2682875 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/dma_qm_4_regs.h
@@ -176,4 +176,3 @@
#define mmDMA_QM_4_CQ_BUF_RDATA 0x42030C
#endif /* ASIC_REG_DMA_QM_4_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
index a161ecfe74de..8618891d5afa 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
@@ -189,18 +189,6 @@
1 << CPU_CA53_CFG_ARM_RST_CONTROL_NL2RESET_SHIFT |\
1 << CPU_CA53_CFG_ARM_RST_CONTROL_NMBISTRESET_SHIFT)
-/* PCI CONFIGURATION SPACE */
-#define mmPCI_CONFIG_ELBI_ADDR 0xFF0
-#define mmPCI_CONFIG_ELBI_DATA 0xFF4
-#define mmPCI_CONFIG_ELBI_CTRL 0xFF8
-#define PCI_CONFIG_ELBI_CTRL_WRITE (1 << 31)
-
-#define mmPCI_CONFIG_ELBI_STS 0xFFC
-#define PCI_CONFIG_ELBI_STS_ERR (1 << 30)
-#define PCI_CONFIG_ELBI_STS_DONE (1 << 31)
-#define PCI_CONFIG_ELBI_STS_MASK (PCI_CONFIG_ELBI_STS_ERR | \
- PCI_CONFIG_ELBI_STS_DONE)
-
#define GOYA_IRQ_HBW_ID_MASK 0x1FFF
#define GOYA_IRQ_HBW_ID_SHIFT 0
#define GOYA_IRQ_HBW_INTERNAL_ID_MASK 0xE000
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
index 6cb0b6e54d41..506e71e201e1 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/goya_regs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0
*
- * Copyright 2016-2018 HabanaLabs, Ltd.
+ * Copyright 2016-2019 HabanaLabs, Ltd.
* All Rights Reserved.
*
*/
@@ -12,6 +12,7 @@
#include "stlb_regs.h"
#include "mmu_regs.h"
#include "pcie_aux_regs.h"
+#include "pcie_wrap_regs.h"
#include "psoc_global_conf_regs.h"
#include "psoc_spi_regs.h"
#include "psoc_mme_pll_regs.h"
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h
index 0a743817aad7..4ae7fed8b18c 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/ic_pll_regs.h
@@ -102,4 +102,3 @@
#define mmIC_PLL_FREQ_CALC_EN 0x4A3440
#endif /* ASIC_REG_IC_PLL_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h
index 4408188aa067..6d35d852798b 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mc_pll_regs.h
@@ -102,4 +102,3 @@
#define mmMC_PLL_FREQ_CALC_EN 0x4A1440
#endif /* ASIC_REG_MC_PLL_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h
index 687bca5c5fe3..6c23f8b96e7e 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_masks.h
@@ -650,4 +650,3 @@
#define MME1_RTR_NON_LIN_SCRAMB_EN_MASK 0x1
#endif /* ASIC_REG_MME1_RTR_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h
index c248339a1cbe..122e9d529939 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme1_rtr_regs.h
@@ -328,4 +328,3 @@
#define mmMME1_RTR_NON_LIN_SCRAMB 0x40604
#endif /* ASIC_REG_MME1_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h
index 7a2b777bdc4f..00ce2252bbfb 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme2_rtr_regs.h
@@ -328,4 +328,3 @@
#define mmMME2_RTR_NON_LIN_SCRAMB 0x80604
#endif /* ASIC_REG_MME2_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h
index b78f8bc387fc..8e3eb7fd2070 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme3_rtr_regs.h
@@ -328,4 +328,3 @@
#define mmMME3_RTR_NON_LIN_SCRAMB 0xC0604
#endif /* ASIC_REG_MME3_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h
index d9a4a02cefa3..79b67bbc8567 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme4_rtr_regs.h
@@ -328,4 +328,3 @@
#define mmMME4_RTR_NON_LIN_SCRAMB 0x100604
#endif /* ASIC_REG_MME4_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h
index 205adc988407..0ac3c37ce47f 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme5_rtr_regs.h
@@ -328,4 +328,3 @@
#define mmMME5_RTR_NON_LIN_SCRAMB 0x140604
#endif /* ASIC_REG_MME5_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h
index fcec68388278..50c49cce72a6 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme6_rtr_regs.h
@@ -328,4 +328,3 @@
#define mmMME6_RTR_NON_LIN_SCRAMB 0x180604
#endif /* ASIC_REG_MME6_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h
index a0d4382fbbd0..fe7d95bdcef9 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_masks.h
@@ -370,4 +370,3 @@
#define MME_CMDQ_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF
#endif /* ASIC_REG_MME_CMDQ_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h
index 5c2f6b870a58..5f8b85d2b4b1 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmMME_CMDQ_CQ_BUF_RDATA 0xD930C
#endif /* ASIC_REG_MME_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h
index c7b1b0bb3384..1882c413cbe0 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_masks.h
@@ -1534,4 +1534,3 @@
#define MME_SHADOW_3_E_BUBBLES_PER_SPLIT_ID_MASK 0xFF000000
#endif /* ASIC_REG_MME_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h
index d4bfa58dce19..e464e381555c 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_masks.h
@@ -462,4 +462,3 @@
#define MME_QM_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF
#endif /* ASIC_REG_MME_QM_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h
index b5b1c776f6c3..538708beffc9 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_qm_regs.h
@@ -176,4 +176,3 @@
#define mmMME_QM_CQ_BUF_RDATA 0xD830C
#endif /* ASIC_REG_MME_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h
index 9436b1e2705a..0396cbfd5c89 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mme_regs.h
@@ -1150,4 +1150,3 @@
#define mmMME_SHADOW_3_E_BUBBLES_PER_SPLIT 0xD0BAC
#endif /* ASIC_REG_MME_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h
index 3a78078d3c4c..c3e69062b135 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mmu_masks.h
@@ -140,4 +140,3 @@
#define MMU_ACCESS_ERROR_CAPTURE_VA_VA_31_0_MASK 0xFFFFFFFF
#endif /* ASIC_REG_MMU_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h
index bec6c014135c..7ec81f12031e 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/mmu_regs.h
@@ -50,4 +50,3 @@
#define mmMMU_ACCESS_ERROR_CAPTURE_VA 0x480040
#endif /* ASIC_REG_MMU_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h
index 209e41402a11..ceb59f2e28b3 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_masks.h
@@ -206,4 +206,3 @@
#define PCI_NRTR_NON_LIN_SCRAMB_EN_MASK 0x1
#endif /* ASIC_REG_PCI_NRTR_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h
index 447e5d4e7dc8..dd067f301ac2 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/pci_nrtr_regs.h
@@ -224,4 +224,3 @@
#define mmPCI_NRTR_NON_LIN_SCRAMB 0x604
#endif /* ASIC_REG_PCI_NRTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h
index daaf5d9079dc..35b1d8ac6f63 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/pcie_aux_regs.h
@@ -240,4 +240,3 @@
#define mmPCIE_AUX_PERST 0xC079B8
#endif /* ASIC_REG_PCIE_AUX_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/pcie_wrap_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/pcie_wrap_regs.h
new file mode 100644
index 000000000000..d1e55aace4a0
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/pcie_wrap_regs.h
@@ -0,0 +1,306 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+/************************************
+ ** This is an auto-generated file **
+ ** DO NOT EDIT BELOW **
+ ************************************/
+
+#ifndef ASIC_REG_PCIE_WRAP_REGS_H_
+#define ASIC_REG_PCIE_WRAP_REGS_H_
+
+/*
+ *****************************************
+ * PCIE_WRAP (Prototype: PCIE_WRAP)
+ *****************************************
+ */
+
+#define mmPCIE_WRAP_PHY_RST_N 0xC01300
+
+#define mmPCIE_WRAP_OUTSTAND_TRANS 0xC01400
+
+#define mmPCIE_WRAP_MASK_REQ 0xC01404
+
+#define mmPCIE_WRAP_IND_AWADDR_L 0xC01500
+
+#define mmPCIE_WRAP_IND_AWADDR_H 0xC01504
+
+#define mmPCIE_WRAP_IND_AWLEN 0xC01508
+
+#define mmPCIE_WRAP_IND_AWSIZE 0xC0150C
+
+#define mmPCIE_WRAP_IND_AWBURST 0xC01510
+
+#define mmPCIE_WRAP_IND_AWLOCK 0xC01514
+
+#define mmPCIE_WRAP_IND_AWCACHE 0xC01518
+
+#define mmPCIE_WRAP_IND_AWPROT 0xC0151C
+
+#define mmPCIE_WRAP_IND_AWVALID 0xC01520
+
+#define mmPCIE_WRAP_IND_WDATA_0 0xC01524
+
+#define mmPCIE_WRAP_IND_WDATA_1 0xC01528
+
+#define mmPCIE_WRAP_IND_WDATA_2 0xC0152C
+
+#define mmPCIE_WRAP_IND_WDATA_3 0xC01530
+
+#define mmPCIE_WRAP_IND_WSTRB 0xC01544
+
+#define mmPCIE_WRAP_IND_WLAST 0xC01548
+
+#define mmPCIE_WRAP_IND_WVALID 0xC0154C
+
+#define mmPCIE_WRAP_IND_BRESP 0xC01550
+
+#define mmPCIE_WRAP_IND_BVALID 0xC01554
+
+#define mmPCIE_WRAP_IND_ARADDR_0 0xC01558
+
+#define mmPCIE_WRAP_IND_ARADDR_1 0xC0155C
+
+#define mmPCIE_WRAP_IND_ARLEN 0xC01560
+
+#define mmPCIE_WRAP_IND_ARSIZE 0xC01564
+
+#define mmPCIE_WRAP_IND_ARBURST 0xC01568
+
+#define mmPCIE_WRAP_IND_ARLOCK 0xC0156C
+
+#define mmPCIE_WRAP_IND_ARCACHE 0xC01570
+
+#define mmPCIE_WRAP_IND_ARPROT 0xC01574
+
+#define mmPCIE_WRAP_IND_ARVALID 0xC01578
+
+#define mmPCIE_WRAP_IND_RDATA_0 0xC0157C
+
+#define mmPCIE_WRAP_IND_RDATA_1 0xC01580
+
+#define mmPCIE_WRAP_IND_RDATA_2 0xC01584
+
+#define mmPCIE_WRAP_IND_RDATA_3 0xC01588
+
+#define mmPCIE_WRAP_IND_RLAST 0xC0159C
+
+#define mmPCIE_WRAP_IND_RRESP 0xC015A0
+
+#define mmPCIE_WRAP_IND_RVALID 0xC015A4
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO 0xC015A8
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO_HDR_34DW_0 0xC015AC
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO_HDR_34DW_1 0xC015B0
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO_P_TAG 0xC015B4
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO_ATU_BYPAS 0xC015B8
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO_FUNC_NUM 0xC015BC
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO_VFUNC_ACT 0xC015C0
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO_VFUNC_NUM 0xC015C4
+
+#define mmPCIE_WRAP_IND_AWMISC_INFO_TLPPRFX 0xC015C8
+
+#define mmPCIE_WRAP_IND_ARMISC_INFO 0xC015CC
+
+#define mmPCIE_WRAP_IND_ARMISC_INFO_TLPPRFX 0xC015D0
+
+#define mmPCIE_WRAP_IND_ARMISC_INFO_ATU_BYP 0xC015D4
+
+#define mmPCIE_WRAP_IND_ARMISC_INFO_FUNC_NUM 0xC015D8
+
+#define mmPCIE_WRAP_IND_ARMISC_INFO_VFUNC_ACT 0xC015DC
+
+#define mmPCIE_WRAP_IND_ARMISC_INFO_VFUNC_NUM 0xC015E0
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO 0xC01800
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO_HDR_34DW_0 0xC01804
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO_HDR_34DW_1 0xC01808
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO_P_TAG 0xC0180C
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO_ATU_BYPAS 0xC01810
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO_FUNC_NUM 0xC01814
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO_VFUNC_ACT 0xC01818
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO_VFUNC_NUM 0xC0181C
+
+#define mmPCIE_WRAP_SLV_AWMISC_INFO_TLPPRFX 0xC01820
+
+#define mmPCIE_WRAP_SLV_ARMISC_INFO 0xC01824
+
+#define mmPCIE_WRAP_SLV_ARMISC_INFO_TLPPRFX 0xC01828
+
+#define mmPCIE_WRAP_SLV_ARMISC_INFO_ATU_BYP 0xC0182C
+
+#define mmPCIE_WRAP_SLV_ARMISC_INFO_FUNC_NUM 0xC01830
+
+#define mmPCIE_WRAP_SLV_ARMISC_INFO_VFUNC_ACT 0xC01834
+
+#define mmPCIE_WRAP_SLV_ARMISC_INFO_VFUNC_NUM 0xC01838
+
+#define mmPCIE_WRAP_MAX_QID 0xC01900
+
+#define mmPCIE_WRAP_DB_BASE_ADDR_L_0 0xC01910
+
+#define mmPCIE_WRAP_DB_BASE_ADDR_L_1 0xC01914
+
+#define mmPCIE_WRAP_DB_BASE_ADDR_L_2 0xC01918
+
+#define mmPCIE_WRAP_DB_BASE_ADDR_L_3 0xC0191C
+
+#define mmPCIE_WRAP_DB_BASE_ADDR_H_0 0xC01920
+
+#define mmPCIE_WRAP_DB_BASE_ADDR_H_1 0xC01924
+
+#define mmPCIE_WRAP_DB_BASE_ADDR_H_2 0xC01928
+
+#define mmPCIE_WRAP_DB_BASE_ADDR_H_3 0xC0192C
+
+#define mmPCIE_WRAP_DB_MASK 0xC01940
+
+#define mmPCIE_WRAP_SQ_BASE_ADDR_H 0xC01A00
+
+#define mmPCIE_WRAP_SQ_BASE_ADDR_L 0xC01A04
+
+#define mmPCIE_WRAP_SQ_STRIDE_ACCRESS 0xC01A08
+
+#define mmPCIE_WRAP_SQ_POP_CMD 0xC01A10
+
+#define mmPCIE_WRAP_SQ_POP_DATA 0xC01A14
+
+#define mmPCIE_WRAP_DB_INTR_0 0xC01A20
+
+#define mmPCIE_WRAP_DB_INTR_1 0xC01A24
+
+#define mmPCIE_WRAP_DB_INTR_2 0xC01A28
+
+#define mmPCIE_WRAP_DB_INTR_3 0xC01A2C
+
+#define mmPCIE_WRAP_DB_INTR_4 0xC01A30
+
+#define mmPCIE_WRAP_DB_INTR_5 0xC01A34
+
+#define mmPCIE_WRAP_DB_INTR_6 0xC01A38
+
+#define mmPCIE_WRAP_DB_INTR_7 0xC01A3C
+
+#define mmPCIE_WRAP_MMU_BYPASS_DMA 0xC01A80
+
+#define mmPCIE_WRAP_MMU_BYPASS_NON_DMA 0xC01A84
+
+#define mmPCIE_WRAP_ASID_NON_DMA 0xC01A90
+
+#define mmPCIE_WRAP_ASID_DMA_0 0xC01AA0
+
+#define mmPCIE_WRAP_ASID_DMA_1 0xC01AA4
+
+#define mmPCIE_WRAP_ASID_DMA_2 0xC01AA8
+
+#define mmPCIE_WRAP_ASID_DMA_3 0xC01AAC
+
+#define mmPCIE_WRAP_ASID_DMA_4 0xC01AB0
+
+#define mmPCIE_WRAP_ASID_DMA_5 0xC01AB4
+
+#define mmPCIE_WRAP_ASID_DMA_6 0xC01AB8
+
+#define mmPCIE_WRAP_ASID_DMA_7 0xC01ABC
+
+#define mmPCIE_WRAP_CPU_HOT_RST 0xC01AE0
+
+#define mmPCIE_WRAP_AXI_PROT_OVR 0xC01AE4
+
+#define mmPCIE_WRAP_CACHE_OVR 0xC01B00
+
+#define mmPCIE_WRAP_LOCK_OVR 0xC01B04
+
+#define mmPCIE_WRAP_PROT_OVR 0xC01B08
+
+#define mmPCIE_WRAP_ARUSER_OVR 0xC01B0C
+
+#define mmPCIE_WRAP_AWUSER_OVR 0xC01B10
+
+#define mmPCIE_WRAP_ARUSER_OVR_EN 0xC01B14
+
+#define mmPCIE_WRAP_AWUSER_OVR_EN 0xC01B18
+
+#define mmPCIE_WRAP_MAX_OUTSTAND 0xC01B20
+
+#define mmPCIE_WRAP_MST_IN 0xC01B24
+
+#define mmPCIE_WRAP_RSP_OK 0xC01B28
+
+#define mmPCIE_WRAP_LBW_CACHE_OVR 0xC01B40
+
+#define mmPCIE_WRAP_LBW_LOCK_OVR 0xC01B44
+
+#define mmPCIE_WRAP_LBW_PROT_OVR 0xC01B48
+
+#define mmPCIE_WRAP_LBW_ARUSER_OVR 0xC01B4C
+
+#define mmPCIE_WRAP_LBW_AWUSER_OVR 0xC01B50
+
+#define mmPCIE_WRAP_LBW_ARUSER_OVR_EN 0xC01B58
+
+#define mmPCIE_WRAP_LBW_AWUSER_OVR_EN 0xC01B5C
+
+#define mmPCIE_WRAP_LBW_MAX_OUTSTAND 0xC01B60
+
+#define mmPCIE_WRAP_LBW_MST_IN 0xC01B64
+
+#define mmPCIE_WRAP_LBW_RSP_OK 0xC01B68
+
+#define mmPCIE_WRAP_QUEUE_INIT 0xC01C00
+
+#define mmPCIE_WRAP_AXI_SPLIT_INTR_0 0xC01C10
+
+#define mmPCIE_WRAP_AXI_SPLIT_INTR_1 0xC01C14
+
+#define mmPCIE_WRAP_DB_AWUSER 0xC01D00
+
+#define mmPCIE_WRAP_DB_ARUSER 0xC01D04
+
+#define mmPCIE_WRAP_PCIE_AWUSER 0xC01D08
+
+#define mmPCIE_WRAP_PCIE_ARUSER 0xC01D0C
+
+#define mmPCIE_WRAP_PSOC_AWUSER 0xC01D10
+
+#define mmPCIE_WRAP_PSOC_ARUSER 0xC01D14
+
+#define mmPCIE_WRAP_SCH_Q_AWUSER 0xC01D18
+
+#define mmPCIE_WRAP_SCH_Q_ARUSER 0xC01D1C
+
+#define mmPCIE_WRAP_PSOC2PCI_AWUSER 0xC01D40
+
+#define mmPCIE_WRAP_PSOC2PCI_ARUSER 0xC01D44
+
+#define mmPCIE_WRAP_DRAIN_TIMEOUT 0xC01D50
+
+#define mmPCIE_WRAP_DRAIN_CFG 0xC01D54
+
+#define mmPCIE_WRAP_DB_AXI_ERR 0xC01DE0
+
+#define mmPCIE_WRAP_SPMU_INTR 0xC01DE4
+
+#define mmPCIE_WRAP_AXI_INTR 0xC01DE8
+
+#define mmPCIE_WRAP_E2E_CTRL 0xC01DF0
+
+#endif /* ASIC_REG_PCIE_WRAP_REGS_H_ */
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h
index 8eda4de58788..9271ea95ebe9 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_emmc_pll_regs.h
@@ -102,4 +102,3 @@
#define mmPSOC_EMMC_PLL_FREQ_CALC_EN 0xC70440
#endif /* ASIC_REG_PSOC_EMMC_PLL_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h
index d4bf0e1db4df..324266653c9a 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_masks.h
@@ -444,4 +444,3 @@
#define PSOC_GLOBAL_CONF_PAD_SEL_VAL_MASK 0x3
#endif /* ASIC_REG_PSOC_GLOBAL_CONF_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h
index cfbdd2c9c5c7..8141f422e712 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_global_conf_regs.h
@@ -742,4 +742,3 @@
#define mmPSOC_GLOBAL_CONF_PAD_SEL_81 0xC4BA44
#endif /* ASIC_REG_PSOC_GLOBAL_CONF_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h
index 6723d8f76f30..4789ebb9c337 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_mme_pll_regs.h
@@ -102,4 +102,3 @@
#define mmPSOC_MME_PLL_FREQ_CALC_EN 0xC71440
#endif /* ASIC_REG_PSOC_MME_PLL_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h
index abcded0531c9..27a296ea6c3d 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_pci_pll_regs.h
@@ -102,4 +102,3 @@
#define mmPSOC_PCI_PLL_FREQ_CALC_EN 0xC72440
#endif /* ASIC_REG_PSOC_PCI_PLL_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h
index 5925c7477c25..66aee7fa6b1e 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/psoc_spi_regs.h
@@ -140,4 +140,3 @@
#define mmPSOC_SPI_RSVD_2 0xC430FC
#endif /* ASIC_REG_PSOC_SPI_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h
index d56c9fa0e7ba..2ea1770b078f 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x0_rtr_regs.h
@@ -80,4 +80,3 @@
#define mmSRAM_Y0_X0_RTR_DBG_L_ARB_MAX 0x201330
#endif /* ASIC_REG_SRAM_Y0_X0_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h
index 5624544303ca..37e0713efa73 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x1_rtr_regs.h
@@ -80,4 +80,3 @@
#define mmSRAM_Y0_X1_RTR_DBG_L_ARB_MAX 0x205330
#endif /* ASIC_REG_SRAM_Y0_X1_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h
index 3322bc0bd1df..d2572279a2b9 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x2_rtr_regs.h
@@ -80,4 +80,3 @@
#define mmSRAM_Y0_X2_RTR_DBG_L_ARB_MAX 0x209330
#endif /* ASIC_REG_SRAM_Y0_X2_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h
index 81e393db2027..68c5b402c506 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x3_rtr_regs.h
@@ -80,4 +80,3 @@
#define mmSRAM_Y0_X3_RTR_DBG_L_ARB_MAX 0x20D330
#endif /* ASIC_REG_SRAM_Y0_X3_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h
index b2e11b1de385..a42f1ba06d28 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/sram_y0_x4_rtr_regs.h
@@ -80,4 +80,3 @@
#define mmSRAM_Y0_X4_RTR_DBG_L_ARB_MAX 0x211330
#endif /* ASIC_REG_SRAM_Y0_X4_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h
index b4ea8cae2757..94f2ed4a36bd 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/stlb_masks.h
@@ -114,4 +114,3 @@
#define STLB_SRAM_INIT_BUSY_DATA_MASK 0x10
#endif /* ASIC_REG_STLB_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h
index 0f5281d3e65b..35013f65acd2 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/stlb_regs.h
@@ -52,4 +52,3 @@
#define mmSTLB_SRAM_INIT 0x49004C
#endif /* ASIC_REG_STLB_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h
index e5587b49eecd..89c9507a512f 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_masks.h
@@ -1604,4 +1604,3 @@
#define TPC0_CFG_FUNC_MBIST_MEM_LAST_FAILED_PATTERN_MASK 0x70000000
#endif /* ASIC_REG_TPC0_CFG_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h
index 2be28a63c50a..7d71c4b73a5e 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cfg_regs.h
@@ -884,4 +884,3 @@
#define mmTPC0_CFG_FUNC_MBIST_MEM_9 0xE06E2C
#endif /* ASIC_REG_TPC0_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h
index 9aa2d8b53207..9395f2458771 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_masks.h
@@ -370,4 +370,3 @@
#define TPC0_CMDQ_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF
#endif /* ASIC_REG_TPC0_CMDQ_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h
index 3572752ba66e..bc51df573bf0 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmTPC0_CMDQ_CQ_BUF_RDATA 0xE0930C
#endif /* ASIC_REG_TPC0_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h
index ed866d93c440..553c6b6bd5ec 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_masks.h
@@ -344,4 +344,3 @@
#define TPC0_EML_CFG_DBG_INST_INSERT_CTL_INSERT_MASK 0x1
#endif /* ASIC_REG_TPC0_EML_CFG_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h
index f1a1b4fa4841..8495479c3659 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_eml_cfg_regs.h
@@ -310,4 +310,3 @@
#define mmTPC0_EML_CFG_DBG_INST_INSERT_CTL 0x3040334
#endif /* ASIC_REG_TPC0_EML_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h
index 7f86621179a5..43fafcf01041 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_masks.h
@@ -206,4 +206,3 @@
#define TPC0_NRTR_NON_LIN_SCRAMB_EN_MASK 0x1
#endif /* ASIC_REG_TPC0_NRTR_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h
index dc280f4e6608..ce3346dd2042 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_nrtr_regs.h
@@ -224,4 +224,3 @@
#define mmTPC0_NRTR_NON_LIN_SCRAMB 0xE00604
#endif /* ASIC_REG_TPC0_NRTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h
index 80d97ee3d8d6..2e4b45947944 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_masks.h
@@ -462,4 +462,3 @@
#define TPC0_QM_CQ_BUF_RDATA_VAL_MASK 0xFFFFFFFF
#endif /* ASIC_REG_TPC0_QM_MASKS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h
index 7552d4ba61fe..4fa09eb88878 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc0_qm_regs.h
@@ -176,4 +176,3 @@
#define mmTPC0_QM_CQ_BUF_RDATA 0xE0830C
#endif /* ASIC_REG_TPC0_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h
index 19894413474a..928eef1808ae 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cfg_regs.h
@@ -884,4 +884,3 @@
#define mmTPC1_CFG_FUNC_MBIST_MEM_9 0xE46E2C
#endif /* ASIC_REG_TPC1_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h
index 9099ebd7ab23..30ae0f307328 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmTPC1_CMDQ_CQ_BUF_RDATA 0xE4930C
#endif /* ASIC_REG_TPC1_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h
index bc8b9a10391f..b95de4f95ba9 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_qm_regs.h
@@ -176,4 +176,3 @@
#define mmTPC1_QM_CQ_BUF_RDATA 0xE4830C
#endif /* ASIC_REG_TPC1_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h
index ae267f8f457e..0f91e307879e 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc1_rtr_regs.h
@@ -320,4 +320,3 @@
#define mmTPC1_RTR_NON_LIN_SCRAMB 0xE40604
#endif /* ASIC_REG_TPC1_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h
index 9c33fc039036..73421227f35b 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cfg_regs.h
@@ -884,4 +884,3 @@
#define mmTPC2_CFG_FUNC_MBIST_MEM_9 0xE86E2C
#endif /* ASIC_REG_TPC2_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h
index 7a643887d6e1..27b66bf2da9f 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmTPC2_CMDQ_CQ_BUF_RDATA 0xE8930C
#endif /* ASIC_REG_TPC2_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h
index f3e32c018064..31e5b2f53905 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_qm_regs.h
@@ -176,4 +176,3 @@
#define mmTPC2_QM_CQ_BUF_RDATA 0xE8830C
#endif /* ASIC_REG_TPC2_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h
index 0eb0cd1fbd19..4eddeaa15d94 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc2_rtr_regs.h
@@ -320,4 +320,3 @@
#define mmTPC2_RTR_NON_LIN_SCRAMB 0xE80604
#endif /* ASIC_REG_TPC2_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h
index 0baf63c69b25..ce573a1a8361 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cfg_regs.h
@@ -884,4 +884,3 @@
#define mmTPC3_CFG_FUNC_MBIST_MEM_9 0xEC6E2C
#endif /* ASIC_REG_TPC3_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h
index 82a5261e852f..11d81fca0a0f 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmTPC3_CMDQ_CQ_BUF_RDATA 0xEC930C
#endif /* ASIC_REG_TPC3_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h
index b05b1e18e664..e41595a19e69 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_qm_regs.h
@@ -176,4 +176,3 @@
#define mmTPC3_QM_CQ_BUF_RDATA 0xEC830C
#endif /* ASIC_REG_TPC3_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h
index 5a2fd7652650..34a438b1efe5 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc3_rtr_regs.h
@@ -320,4 +320,3 @@
#define mmTPC3_RTR_NON_LIN_SCRAMB 0xEC0604
#endif /* ASIC_REG_TPC3_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h
index d64a100075f2..d44caf0fc1bb 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cfg_regs.h
@@ -884,4 +884,3 @@
#define mmTPC4_CFG_FUNC_MBIST_MEM_9 0xF06E2C
#endif /* ASIC_REG_TPC4_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h
index 565b42885b0d..f13a6532961f 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmTPC4_CMDQ_CQ_BUF_RDATA 0xF0930C
#endif /* ASIC_REG_TPC4_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h
index 196da3f12710..db081fc17cfc 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_qm_regs.h
@@ -176,4 +176,3 @@
#define mmTPC4_QM_CQ_BUF_RDATA 0xF0830C
#endif /* ASIC_REG_TPC4_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h
index 8b54041d144a..8c5372303b28 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc4_rtr_regs.h
@@ -320,4 +320,3 @@
#define mmTPC4_RTR_NON_LIN_SCRAMB 0xF00604
#endif /* ASIC_REG_TPC4_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h
index 3f00954fcdba..5139fde71011 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cfg_regs.h
@@ -884,4 +884,3 @@
#define mmTPC5_CFG_FUNC_MBIST_MEM_9 0xF46E2C
#endif /* ASIC_REG_TPC5_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h
index d8e72a8e18d7..1e7cd6e1e888 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmTPC5_CMDQ_CQ_BUF_RDATA 0xF4930C
#endif /* ASIC_REG_TPC5_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h
index be2e68624709..ac0d3820cd6b 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_qm_regs.h
@@ -176,4 +176,3 @@
#define mmTPC5_QM_CQ_BUF_RDATA 0xF4830C
#endif /* ASIC_REG_TPC5_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h
index 6f301c7bbc2f..57f83bc3b17d 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc5_rtr_regs.h
@@ -320,4 +320,3 @@
#define mmTPC5_RTR_NON_LIN_SCRAMB 0xF40604
#endif /* ASIC_REG_TPC5_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h
index 1e1168601c41..94e0191c06c1 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cfg_regs.h
@@ -884,4 +884,3 @@
#define mmTPC6_CFG_FUNC_MBIST_MEM_9 0xF86E2C
#endif /* ASIC_REG_TPC6_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h
index fbca6b47284e..7a1a0e87b225 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmTPC6_CMDQ_CQ_BUF_RDATA 0xF8930C
#endif /* ASIC_REG_TPC6_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h
index bf32465dabcb..80fa0fe0f60f 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_qm_regs.h
@@ -176,4 +176,3 @@
#define mmTPC6_QM_CQ_BUF_RDATA 0xF8830C
#endif /* ASIC_REG_TPC6_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h
index 609bb90e1046..d6cae8b8af66 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc6_rtr_regs.h
@@ -320,4 +320,3 @@
#define mmTPC6_RTR_NON_LIN_SCRAMB 0xF80604
#endif /* ASIC_REG_TPC6_RTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h
index bf2fd0f73906..234147adb779 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cfg_regs.h
@@ -884,4 +884,3 @@
#define mmTPC7_CFG_FUNC_MBIST_MEM_9 0xFC6E2C
#endif /* ASIC_REG_TPC7_CFG_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h
index 65d83043bf63..4c160632fe7d 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_cmdq_regs.h
@@ -136,4 +136,3 @@
#define mmTPC7_CMDQ_CQ_BUF_RDATA 0xFC930C
#endif /* ASIC_REG_TPC7_CMDQ_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h
index 3d5848d87304..0c13d4d167aa 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_nrtr_regs.h
@@ -224,4 +224,3 @@
#define mmTPC7_NRTR_NON_LIN_SCRAMB 0xFC0604
#endif /* ASIC_REG_TPC7_NRTR_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h
index 25f5095f68fb..cbe11425bfb0 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc7_qm_regs.h
@@ -176,4 +176,3 @@
#define mmTPC7_QM_CQ_BUF_RDATA 0xFC830C
#endif /* ASIC_REG_TPC7_QM_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h b/drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h
index 920231d0afa5..e25e19660a9d 100644
--- a/drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h
+++ b/drivers/misc/habanalabs/include/goya/asic_reg/tpc_pll_regs.h
@@ -102,4 +102,3 @@
#define mmTPC_PLL_FREQ_CALC_EN 0xE01440
#endif /* ASIC_REG_TPC_PLL_REGS_H_ */
-
diff --git a/drivers/misc/habanalabs/include/goya/goya.h b/drivers/misc/habanalabs/include/goya/goya.h
index 614149efa412..3f02a52ba4ce 100644
--- a/drivers/misc/habanalabs/include/goya/goya.h
+++ b/drivers/misc/habanalabs/include/goya/goya.h
@@ -8,10 +8,6 @@
#ifndef GOYA_H
#define GOYA_H
-#include "asic_reg/goya_regs.h"
-
-#include <linux/types.h>
-
#define SRAM_CFG_BAR_ID 0
#define MSIX_BAR_ID 2
#define DDR_BAR_ID 4
diff --git a/drivers/misc/habanalabs/include/goya/goya_async_events.h b/drivers/misc/habanalabs/include/goya/goya_async_events.h
index 497937a17ee9..bb7a1aa3279e 100644
--- a/drivers/misc/habanalabs/include/goya/goya_async_events.h
+++ b/drivers/misc/habanalabs/include/goya/goya_async_events.h
@@ -9,7 +9,9 @@
#define __GOYA_ASYNC_EVENTS_H_
enum goya_async_event_id {
+ GOYA_ASYNC_EVENT_ID_PCIE_CORE = 32,
GOYA_ASYNC_EVENT_ID_PCIE_IF = 33,
+ GOYA_ASYNC_EVENT_ID_PCIE_PHY = 34,
GOYA_ASYNC_EVENT_ID_TPC0_ECC = 36,
GOYA_ASYNC_EVENT_ID_TPC1_ECC = 39,
GOYA_ASYNC_EVENT_ID_TPC2_ECC = 42,
@@ -23,6 +25,8 @@ enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_MMU_ECC = 63,
GOYA_ASYNC_EVENT_ID_DMA_MACRO = 64,
GOYA_ASYNC_EVENT_ID_DMA_ECC = 66,
+ GOYA_ASYNC_EVENT_ID_DDR0_PARITY = 69,
+ GOYA_ASYNC_EVENT_ID_DDR1_PARITY = 72,
GOYA_ASYNC_EVENT_ID_CPU_IF_ECC = 75,
GOYA_ASYNC_EVENT_ID_PSOC_MEM = 78,
GOYA_ASYNC_EVENT_ID_PSOC_CORESIGHT = 79,
@@ -72,6 +76,7 @@ enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_MME_WACSD = 142,
GOYA_ASYNC_EVENT_ID_PLL0 = 143,
GOYA_ASYNC_EVENT_ID_PLL1 = 144,
+ GOYA_ASYNC_EVENT_ID_PLL2 = 145,
GOYA_ASYNC_EVENT_ID_PLL3 = 146,
GOYA_ASYNC_EVENT_ID_PLL4 = 147,
GOYA_ASYNC_EVENT_ID_PLL5 = 148,
@@ -81,6 +86,7 @@ enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_PSOC = 160,
GOYA_ASYNC_EVENT_ID_PCIE_FLR = 171,
GOYA_ASYNC_EVENT_ID_PCIE_HOT_RESET = 172,
+ GOYA_ASYNC_EVENT_ID_PCIE_PERST = 173,
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG0 = 174,
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG1 = 175,
GOYA_ASYNC_EVENT_ID_PCIE_QID0_ENG2 = 176,
@@ -144,8 +150,11 @@ enum goya_async_event_id {
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_0 = 330,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_1 = 331,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_2 = 332,
+ GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_3 = 333,
+ GOYA_ASYNC_EVENT_ID_PSOC_GPIO_U16_4 = 334,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET = 356,
GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT = 361,
+ GOYA_ASYNC_EVENT_ID_FAN = 425,
GOYA_ASYNC_EVENT_ID_TPC0_CMDQ = 430,
GOYA_ASYNC_EVENT_ID_TPC1_CMDQ = 431,
GOYA_ASYNC_EVENT_ID_TPC2_CMDQ = 432,
diff --git a/drivers/misc/habanalabs/include/goya/goya_coresight.h b/drivers/misc/habanalabs/include/goya/goya_coresight.h
new file mode 100644
index 000000000000..6e933c0ca5cd
--- /dev/null
+++ b/drivers/misc/habanalabs/include/goya/goya_coresight.h
@@ -0,0 +1,199 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2018 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef GOYA_CORESIGHT_H
+#define GOYA_CORESIGHT_H
+
+enum goya_debug_stm_regs_index {
+ GOYA_STM_FIRST = 0,
+ GOYA_STM_CPU = GOYA_STM_FIRST,
+ GOYA_STM_DMA_CH_0_CS,
+ GOYA_STM_DMA_CH_1_CS,
+ GOYA_STM_DMA_CH_2_CS,
+ GOYA_STM_DMA_CH_3_CS,
+ GOYA_STM_DMA_CH_4_CS,
+ GOYA_STM_DMA_MACRO_CS,
+ GOYA_STM_MME1_SBA,
+ GOYA_STM_MME3_SBB,
+ GOYA_STM_MME4_WACS2,
+ GOYA_STM_MME4_WACS,
+ GOYA_STM_MMU_CS,
+ GOYA_STM_PCIE,
+ GOYA_STM_PSOC,
+ GOYA_STM_TPC0_EML,
+ GOYA_STM_TPC1_EML,
+ GOYA_STM_TPC2_EML,
+ GOYA_STM_TPC3_EML,
+ GOYA_STM_TPC4_EML,
+ GOYA_STM_TPC5_EML,
+ GOYA_STM_TPC6_EML,
+ GOYA_STM_TPC7_EML,
+ GOYA_STM_LAST = GOYA_STM_TPC7_EML
+};
+
+enum goya_debug_etf_regs_index {
+ GOYA_ETF_FIRST = 0,
+ GOYA_ETF_CPU_0 = GOYA_ETF_FIRST,
+ GOYA_ETF_CPU_1,
+ GOYA_ETF_CPU_TRACE,
+ GOYA_ETF_DMA_CH_0_CS,
+ GOYA_ETF_DMA_CH_1_CS,
+ GOYA_ETF_DMA_CH_2_CS,
+ GOYA_ETF_DMA_CH_3_CS,
+ GOYA_ETF_DMA_CH_4_CS,
+ GOYA_ETF_DMA_MACRO_CS,
+ GOYA_ETF_MME1_SBA,
+ GOYA_ETF_MME3_SBB,
+ GOYA_ETF_MME4_WACS2,
+ GOYA_ETF_MME4_WACS,
+ GOYA_ETF_MMU_CS,
+ GOYA_ETF_PCIE,
+ GOYA_ETF_PSOC,
+ GOYA_ETF_TPC0_EML,
+ GOYA_ETF_TPC1_EML,
+ GOYA_ETF_TPC2_EML,
+ GOYA_ETF_TPC3_EML,
+ GOYA_ETF_TPC4_EML,
+ GOYA_ETF_TPC5_EML,
+ GOYA_ETF_TPC6_EML,
+ GOYA_ETF_TPC7_EML,
+ GOYA_ETF_LAST = GOYA_ETF_TPC7_EML
+};
+
+enum goya_debug_funnel_regs_index {
+ GOYA_FUNNEL_FIRST = 0,
+ GOYA_FUNNEL_CPU = GOYA_FUNNEL_FIRST,
+ GOYA_FUNNEL_DMA_CH_6_1,
+ GOYA_FUNNEL_DMA_MACRO_3_1,
+ GOYA_FUNNEL_MME0_RTR,
+ GOYA_FUNNEL_MME1_RTR,
+ GOYA_FUNNEL_MME2_RTR,
+ GOYA_FUNNEL_MME3_RTR,
+ GOYA_FUNNEL_MME4_RTR,
+ GOYA_FUNNEL_MME5_RTR,
+ GOYA_FUNNEL_PCIE,
+ GOYA_FUNNEL_PSOC,
+ GOYA_FUNNEL_TPC0_EML,
+ GOYA_FUNNEL_TPC1_EML,
+ GOYA_FUNNEL_TPC1_RTR,
+ GOYA_FUNNEL_TPC2_EML,
+ GOYA_FUNNEL_TPC2_RTR,
+ GOYA_FUNNEL_TPC3_EML,
+ GOYA_FUNNEL_TPC3_RTR,
+ GOYA_FUNNEL_TPC4_EML,
+ GOYA_FUNNEL_TPC4_RTR,
+ GOYA_FUNNEL_TPC5_EML,
+ GOYA_FUNNEL_TPC5_RTR,
+ GOYA_FUNNEL_TPC6_EML,
+ GOYA_FUNNEL_TPC6_RTR,
+ GOYA_FUNNEL_TPC7_EML,
+ GOYA_FUNNEL_LAST = GOYA_FUNNEL_TPC7_EML
+};
+
+enum goya_debug_bmon_regs_index {
+ GOYA_BMON_FIRST = 0,
+ GOYA_BMON_CPU_RD = GOYA_BMON_FIRST,
+ GOYA_BMON_CPU_WR,
+ GOYA_BMON_DMA_CH_0_0,
+ GOYA_BMON_DMA_CH_0_1,
+ GOYA_BMON_DMA_CH_1_0,
+ GOYA_BMON_DMA_CH_1_1,
+ GOYA_BMON_DMA_CH_2_0,
+ GOYA_BMON_DMA_CH_2_1,
+ GOYA_BMON_DMA_CH_3_0,
+ GOYA_BMON_DMA_CH_3_1,
+ GOYA_BMON_DMA_CH_4_0,
+ GOYA_BMON_DMA_CH_4_1,
+ GOYA_BMON_DMA_MACRO_0,
+ GOYA_BMON_DMA_MACRO_1,
+ GOYA_BMON_DMA_MACRO_2,
+ GOYA_BMON_DMA_MACRO_3,
+ GOYA_BMON_DMA_MACRO_4,
+ GOYA_BMON_DMA_MACRO_5,
+ GOYA_BMON_DMA_MACRO_6,
+ GOYA_BMON_DMA_MACRO_7,
+ GOYA_BMON_MME1_SBA_0,
+ GOYA_BMON_MME1_SBA_1,
+ GOYA_BMON_MME3_SBB_0,
+ GOYA_BMON_MME3_SBB_1,
+ GOYA_BMON_MME4_WACS2_0,
+ GOYA_BMON_MME4_WACS2_1,
+ GOYA_BMON_MME4_WACS2_2,
+ GOYA_BMON_MME4_WACS_0,
+ GOYA_BMON_MME4_WACS_1,
+ GOYA_BMON_MME4_WACS_2,
+ GOYA_BMON_MME4_WACS_3,
+ GOYA_BMON_MME4_WACS_4,
+ GOYA_BMON_MME4_WACS_5,
+ GOYA_BMON_MME4_WACS_6,
+ GOYA_BMON_MMU_0,
+ GOYA_BMON_MMU_1,
+ GOYA_BMON_PCIE_MSTR_RD,
+ GOYA_BMON_PCIE_MSTR_WR,
+ GOYA_BMON_PCIE_SLV_RD,
+ GOYA_BMON_PCIE_SLV_WR,
+ GOYA_BMON_TPC0_EML_0,
+ GOYA_BMON_TPC0_EML_1,
+ GOYA_BMON_TPC0_EML_2,
+ GOYA_BMON_TPC0_EML_3,
+ GOYA_BMON_TPC1_EML_0,
+ GOYA_BMON_TPC1_EML_1,
+ GOYA_BMON_TPC1_EML_2,
+ GOYA_BMON_TPC1_EML_3,
+ GOYA_BMON_TPC2_EML_0,
+ GOYA_BMON_TPC2_EML_1,
+ GOYA_BMON_TPC2_EML_2,
+ GOYA_BMON_TPC2_EML_3,
+ GOYA_BMON_TPC3_EML_0,
+ GOYA_BMON_TPC3_EML_1,
+ GOYA_BMON_TPC3_EML_2,
+ GOYA_BMON_TPC3_EML_3,
+ GOYA_BMON_TPC4_EML_0,
+ GOYA_BMON_TPC4_EML_1,
+ GOYA_BMON_TPC4_EML_2,
+ GOYA_BMON_TPC4_EML_3,
+ GOYA_BMON_TPC5_EML_0,
+ GOYA_BMON_TPC5_EML_1,
+ GOYA_BMON_TPC5_EML_2,
+ GOYA_BMON_TPC5_EML_3,
+ GOYA_BMON_TPC6_EML_0,
+ GOYA_BMON_TPC6_EML_1,
+ GOYA_BMON_TPC6_EML_2,
+ GOYA_BMON_TPC6_EML_3,
+ GOYA_BMON_TPC7_EML_0,
+ GOYA_BMON_TPC7_EML_1,
+ GOYA_BMON_TPC7_EML_2,
+ GOYA_BMON_TPC7_EML_3,
+ GOYA_BMON_LAST = GOYA_BMON_TPC7_EML_3
+};
+
+enum goya_debug_spmu_regs_index {
+ GOYA_SPMU_FIRST = 0,
+ GOYA_SPMU_DMA_CH_0_CS = GOYA_SPMU_FIRST,
+ GOYA_SPMU_DMA_CH_1_CS,
+ GOYA_SPMU_DMA_CH_2_CS,
+ GOYA_SPMU_DMA_CH_3_CS,
+ GOYA_SPMU_DMA_CH_4_CS,
+ GOYA_SPMU_DMA_MACRO_CS,
+ GOYA_SPMU_MME1_SBA,
+ GOYA_SPMU_MME3_SBB,
+ GOYA_SPMU_MME4_WACS2,
+ GOYA_SPMU_MME4_WACS,
+ GOYA_SPMU_MMU_CS,
+ GOYA_SPMU_PCIE,
+ GOYA_SPMU_TPC0_EML,
+ GOYA_SPMU_TPC1_EML,
+ GOYA_SPMU_TPC2_EML,
+ GOYA_SPMU_TPC3_EML,
+ GOYA_SPMU_TPC4_EML,
+ GOYA_SPMU_TPC5_EML,
+ GOYA_SPMU_TPC6_EML,
+ GOYA_SPMU_TPC7_EML,
+ GOYA_SPMU_LAST = GOYA_SPMU_TPC7_EML
+};
+
+#endif /* GOYA_CORESIGHT_H */
diff --git a/drivers/misc/habanalabs/include/goya/goya_fw_if.h b/drivers/misc/habanalabs/include/goya/goya_fw_if.h
index a9920cb4a07b..0fa80fe9f6cc 100644
--- a/drivers/misc/habanalabs/include/goya/goya_fw_if.h
+++ b/drivers/misc/habanalabs/include/goya/goya_fw_if.h
@@ -8,6 +8,8 @@
#ifndef GOYA_FW_IF_H
#define GOYA_FW_IF_H
+#define GOYA_EVENT_QUEUE_MSIX_IDX 5
+
#define CPU_BOOT_ADDR 0x7FF8040000ull
#define UBOOT_FW_OFFSET 0x100000 /* 1MB in SRAM */
diff --git a/drivers/misc/habanalabs/include/hl_boot_if.h b/drivers/misc/habanalabs/include/hl_boot_if.h
index 7475732b9996..4cd04c090285 100644
--- a/drivers/misc/habanalabs/include/hl_boot_if.h
+++ b/drivers/misc/habanalabs/include/hl_boot_if.h
@@ -18,7 +18,8 @@ enum cpu_boot_status {
CPU_BOOT_STATUS_IN_SPL,
CPU_BOOT_STATUS_IN_UBOOT,
CPU_BOOT_STATUS_DRAM_INIT_FAIL,
- CPU_BOOT_STATUS_FIT_CORRUPTED
+ CPU_BOOT_STATUS_FIT_CORRUPTED,
+ CPU_BOOT_STATUS_UBOOT_NOT_READY,
};
enum kmd_msg {
diff --git a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
index b680052ee3f0..71ea3c3e8ba3 100644
--- a/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
+++ b/drivers/misc/habanalabs/include/hw_ip/mmu/mmu_general.h
@@ -14,16 +14,16 @@
#define PAGE_SIZE_4KB (_AC(1, UL) << PAGE_SHIFT_4KB)
#define PAGE_MASK_2MB (~(PAGE_SIZE_2MB - 1))
-#define PAGE_PRESENT_MASK 0x0000000000001
-#define SWAP_OUT_MASK 0x0000000000004
-#define LAST_MASK 0x0000000000800
-#define PHYS_ADDR_MASK 0x3FFFFFFFFF000ull
+#define PAGE_PRESENT_MASK 0x0000000000001ull
+#define SWAP_OUT_MASK 0x0000000000004ull
+#define LAST_MASK 0x0000000000800ull
+#define PHYS_ADDR_MASK 0xFFFFFFFFFFFFF000ull
#define HOP0_MASK 0x3000000000000ull
#define HOP1_MASK 0x0FF8000000000ull
#define HOP2_MASK 0x0007FC0000000ull
-#define HOP3_MASK 0x000003FE00000
-#define HOP4_MASK 0x00000001FF000
-#define OFFSET_MASK 0x0000000000FFF
+#define HOP3_MASK 0x000003FE00000ull
+#define HOP4_MASK 0x00000001FF000ull
+#define OFFSET_MASK 0x0000000000FFFull
#define HOP0_SHIFT 48
#define HOP1_SHIFT 39
@@ -32,7 +32,7 @@
#define HOP4_SHIFT 12
#define PTE_PHYS_ADDR_SHIFT 12
-#define PTE_PHYS_ADDR_MASK ~0xFFF
+#define PTE_PHYS_ADDR_MASK ~OFFSET_MASK
#define HL_PTE_SIZE sizeof(u64)
#define HOP_TABLE_SIZE PAGE_SIZE_4KB
diff --git a/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h
new file mode 100644
index 000000000000..d232081d4e0f
--- /dev/null
+++ b/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0
+ *
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ *
+ */
+
+#ifndef INCLUDE_PCI_GENERAL_H_
+#define INCLUDE_PCI_GENERAL_H_
+
+/* PCI CONFIGURATION SPACE */
+#define mmPCI_CONFIG_ELBI_ADDR 0xFF0
+#define mmPCI_CONFIG_ELBI_DATA 0xFF4
+#define mmPCI_CONFIG_ELBI_CTRL 0xFF8
+#define PCI_CONFIG_ELBI_CTRL_WRITE (1 << 31)
+
+#define mmPCI_CONFIG_ELBI_STS 0xFFC
+#define PCI_CONFIG_ELBI_STS_ERR (1 << 30)
+#define PCI_CONFIG_ELBI_STS_DONE (1 << 31)
+#define PCI_CONFIG_ELBI_STS_MASK (PCI_CONFIG_ELBI_STS_ERR | \
+ PCI_CONFIG_ELBI_STS_DONE)
+
+#endif /* INCLUDE_PCI_GENERAL_H_ */
diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c
index ce1fda40a8b8..43ef3ad8438a 100644
--- a/drivers/misc/habanalabs/memory.c
+++ b/drivers/misc/habanalabs/memory.c
@@ -109,7 +109,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
page_size);
if (!phys_pg_pack->pages[i]) {
dev_err(hdev->dev,
- "ioctl failed to allocate page\n");
+ "Failed to allocate device memory (out of memory)\n");
rc = -ENOMEM;
goto page_err;
}
@@ -1046,10 +1046,17 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr)
mutex_lock(&ctx->mmu_lock);
- for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size)
+ for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
if (hl_mmu_unmap(ctx, next_vaddr, page_size))
dev_warn_ratelimited(hdev->dev,
- "unmap failed for vaddr: 0x%llx\n", next_vaddr);
+ "unmap failed for vaddr: 0x%llx\n", next_vaddr);
+
+ /* unmapping on Palladium can be really long, so avoid a CPU
+ * soft lockup bug by sleeping a little between unmapping pages
+ */
+ if (hdev->pldm)
+ usleep_range(500, 1000);
+ }
hdev->asic_funcs->mmu_invalidate_cache(hdev, true);
@@ -1083,6 +1090,64 @@ vm_type_err:
return rc;
}
+static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_ctx *ctx = hpriv->ctx;
+ u64 device_addr = 0;
+ u32 handle = 0;
+ int rc;
+
+ switch (args->in.op) {
+ case HL_MEM_OP_ALLOC:
+ if (args->in.alloc.mem_size == 0) {
+ dev_err(hdev->dev,
+ "alloc size must be larger than 0\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ /* Force contiguous as there are no real MMU
+ * translations to overcome physical memory gaps
+ */
+ args->in.flags |= HL_MEM_CONTIGUOUS;
+ rc = alloc_device_memory(ctx, &args->in, &handle);
+
+ memset(args, 0, sizeof(*args));
+ args->out.handle = (__u64) handle;
+ break;
+
+ case HL_MEM_OP_FREE:
+ rc = free_device_memory(ctx, args->in.free.handle);
+ break;
+
+ case HL_MEM_OP_MAP:
+ if (args->in.flags & HL_MEM_USERPTR) {
+ device_addr = args->in.map_host.host_virt_addr;
+ rc = 0;
+ } else {
+ rc = get_paddr_from_handle(ctx, &args->in,
+ &device_addr);
+ }
+
+ memset(args, 0, sizeof(*args));
+ args->out.device_virt_addr = device_addr;
+ break;
+
+ case HL_MEM_OP_UNMAP:
+ rc = 0;
+ break;
+
+ default:
+ dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+ rc = -ENOTTY;
+ break;
+ }
+
+out:
+ return rc;
+}
+
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
{
union hl_mem_args *args = data;
@@ -1094,104 +1159,54 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
if (hl_device_disabled_or_in_reset(hdev)) {
dev_warn_ratelimited(hdev->dev,
- "Device is disabled or in reset. Can't execute memory IOCTL\n");
+ "Device is %s. Can't execute MEMORY IOCTL\n",
+ atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
return -EBUSY;
}
- if (hdev->mmu_enable) {
- switch (args->in.op) {
- case HL_MEM_OP_ALLOC:
- if (!hdev->dram_supports_virtual_memory) {
- dev_err(hdev->dev,
- "DRAM alloc is not supported\n");
- rc = -EINVAL;
- goto out;
- }
- if (args->in.alloc.mem_size == 0) {
- dev_err(hdev->dev,
- "alloc size must be larger than 0\n");
- rc = -EINVAL;
- goto out;
- }
- rc = alloc_device_memory(ctx, &args->in, &handle);
-
- memset(args, 0, sizeof(*args));
- args->out.handle = (__u64) handle;
- break;
-
- case HL_MEM_OP_FREE:
- if (!hdev->dram_supports_virtual_memory) {
- dev_err(hdev->dev,
- "DRAM free is not supported\n");
- rc = -EINVAL;
- goto out;
- }
- rc = free_device_memory(ctx, args->in.free.handle);
- break;
-
- case HL_MEM_OP_MAP:
- rc = map_device_va(ctx, &args->in, &device_addr);
-
- memset(args, 0, sizeof(*args));
- args->out.device_virt_addr = device_addr;
- break;
-
- case HL_MEM_OP_UNMAP:
- rc = unmap_device_va(ctx,
- args->in.unmap.device_virt_addr);
- break;
+ if (!hdev->mmu_enable)
+ return mem_ioctl_no_mmu(hpriv, args);
- default:
- dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
- rc = -ENOTTY;
- break;
+ switch (args->in.op) {
+ case HL_MEM_OP_ALLOC:
+ if (!hdev->dram_supports_virtual_memory) {
+ dev_err(hdev->dev, "DRAM alloc is not supported\n");
+ rc = -EINVAL;
+ goto out;
}
- } else {
- switch (args->in.op) {
- case HL_MEM_OP_ALLOC:
- if (args->in.alloc.mem_size == 0) {
- dev_err(hdev->dev,
- "alloc size must be larger than 0\n");
- rc = -EINVAL;
- goto out;
- }
- /* Force contiguous as there are no real MMU
- * translations to overcome physical memory gaps
- */
- args->in.flags |= HL_MEM_CONTIGUOUS;
- rc = alloc_device_memory(ctx, &args->in, &handle);
+ if (args->in.alloc.mem_size == 0) {
+ dev_err(hdev->dev,
+ "alloc size must be larger than 0\n");
+ rc = -EINVAL;
+ goto out;
+ }
+ rc = alloc_device_memory(ctx, &args->in, &handle);
- memset(args, 0, sizeof(*args));
- args->out.handle = (__u64) handle;
- break;
+ memset(args, 0, sizeof(*args));
+ args->out.handle = (__u64) handle;
+ break;
- case HL_MEM_OP_FREE:
- rc = free_device_memory(ctx, args->in.free.handle);
- break;
+ case HL_MEM_OP_FREE:
+ rc = free_device_memory(ctx, args->in.free.handle);
+ break;
- case HL_MEM_OP_MAP:
- if (args->in.flags & HL_MEM_USERPTR) {
- device_addr = args->in.map_host.host_virt_addr;
- rc = 0;
- } else {
- rc = get_paddr_from_handle(ctx, &args->in,
- &device_addr);
- }
+ case HL_MEM_OP_MAP:
+ rc = map_device_va(ctx, &args->in, &device_addr);
- memset(args, 0, sizeof(*args));
- args->out.device_virt_addr = device_addr;
- break;
+ memset(args, 0, sizeof(*args));
+ args->out.device_virt_addr = device_addr;
+ break;
- case HL_MEM_OP_UNMAP:
- rc = 0;
- break;
+ case HL_MEM_OP_UNMAP:
+ rc = unmap_device_va(ctx,
+ args->in.unmap.device_virt_addr);
+ break;
- default:
- dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
- rc = -ENOTTY;
- break;
- }
+ default:
+ dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
+ rc = -ENOTTY;
+ break;
}
out:
diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c
index 3a5a2cec8305..533d9315b6fb 100644
--- a/drivers/misc/habanalabs/mmu.c
+++ b/drivers/misc/habanalabs/mmu.c
@@ -11,13 +11,15 @@
#include <linux/genalloc.h>
#include <linux/slab.h>
-static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 addr)
+static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
+
+static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
{
struct pgt_info *pgt_info = NULL;
- hash_for_each_possible(ctx->mmu_hash, pgt_info, node,
- (unsigned long) addr)
- if (addr == pgt_info->addr)
+ hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
+ (unsigned long) hop_addr)
+ if (hop_addr == pgt_info->shadow_addr)
break;
return pgt_info;
@@ -25,45 +27,109 @@ static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 addr)
static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
{
+ struct hl_device *hdev = ctx->hdev;
struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
- gen_pool_free(pgt_info->ctx->hdev->mmu_pgt_pool, pgt_info->addr,
- ctx->hdev->asic_prop.mmu_hop_table_size);
+ gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
+ hdev->asic_prop.mmu_hop_table_size);
hash_del(&pgt_info->node);
-
+ kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
kfree(pgt_info);
}
static u64 alloc_hop(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pgt_info *pgt_info;
- u64 addr;
+ u64 phys_addr, shadow_addr;
pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
if (!pgt_info)
return ULLONG_MAX;
- addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
- hdev->asic_prop.mmu_hop_table_size);
- if (!addr) {
+ phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
+ prop->mmu_hop_table_size);
+ if (!phys_addr) {
dev_err(hdev->dev, "failed to allocate page\n");
- kfree(pgt_info);
- return ULLONG_MAX;
+ goto pool_add_err;
}
- pgt_info->addr = addr;
+ shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
+ GFP_KERNEL);
+ if (!shadow_addr)
+ goto shadow_err;
+
+ pgt_info->phys_addr = phys_addr;
+ pgt_info->shadow_addr = shadow_addr;
pgt_info->ctx = ctx;
pgt_info->num_of_ptes = 0;
- hash_add(ctx->mmu_hash, &pgt_info->node, addr);
+ hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
+
+ return shadow_addr;
+
+shadow_err:
+ gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
+pool_add_err:
+ kfree(pgt_info);
+
+ return ULLONG_MAX;
+}
+
+static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
+{
+ return ctx->hdev->asic_prop.mmu_pgt_addr +
+ (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline u64 get_hop0_addr(struct hl_ctx *ctx)
+{
+ return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
+ (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
+}
+
+static inline void flush(struct hl_ctx *ctx)
+{
+ /* flush all writes from all cores to reach PCI */
+ mb();
+ ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
+}
+
+/* transform the value to physical address when writing to H/W */
+static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
+{
+ /*
+ * The value to write is actually the address of the next shadow hop +
+ * flags at the 12 LSBs.
+ * Hence in order to get the value to write to the physical PTE, we
+ * clear the 12 LSBs and translate the shadow hop to its associated
+ * physical hop, and add back the original 12 LSBs.
+ */
+ u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) |
+ (val & OFFSET_MASK);
+
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+ get_phys_addr(ctx, shadow_pte_addr),
+ phys_val);
+
+ *(u64 *) (uintptr_t) shadow_pte_addr = val;
+}
- return addr;
+/* do not transform the value to physical address when writing to H/W */
+static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
+ u64 val)
+{
+ ctx->hdev->asic_funcs->write_pte(ctx->hdev,
+ get_phys_addr(ctx, shadow_pte_addr),
+ val);
+ *(u64 *) (uintptr_t) shadow_pte_addr = val;
}
-static inline void clear_pte(struct hl_device *hdev, u64 pte_addr)
+/* clear the last and present bits */
+static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
{
- /* clear the last and present bits */
- hdev->asic_funcs->write_pte(hdev, pte_addr, 0);
+ /* no need to transform the value to physical address */
+ write_final_pte(ctx, pte_addr, 0);
}
static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
@@ -98,12 +164,6 @@ static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
return num_of_ptes_left;
}
-static inline u64 get_hop0_addr(struct hl_ctx *ctx)
-{
- return ctx->hdev->asic_prop.mmu_pgt_addr +
- (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
-}
-
static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
u64 virt_addr, u64 mask, u64 shift)
{
@@ -136,7 +196,7 @@ static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
}
-static inline u64 get_next_hop_addr(u64 curr_pte)
+static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
{
if (curr_pte & PAGE_PRESENT_MASK)
return curr_pte & PHYS_ADDR_MASK;
@@ -147,7 +207,7 @@ static inline u64 get_next_hop_addr(u64 curr_pte)
static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
bool *is_new_hop)
{
- u64 hop_addr = get_next_hop_addr(curr_pte);
+ u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
if (hop_addr == ULLONG_MAX) {
hop_addr = alloc_hop(ctx);
@@ -157,106 +217,30 @@ static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
return hop_addr;
}
-/*
- * hl_mmu_init - init the mmu module
- *
- * @hdev: pointer to the habanalabs device structure
- *
- * This function does the following:
- * - Allocate max_asid zeroed hop0 pgts so no mapping is available
- * - Enable mmu in hw
- * - Invalidate the mmu cache
- * - Create a pool of pages for pgts
- * - Returns 0 on success
- *
- * This function depends on DMA QMAN to be working!
- */
-int hl_mmu_init(struct hl_device *hdev)
+/* translates shadow address inside hop to a physical address */
+static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
{
- struct asic_fixed_properties *prop = &hdev->asic_prop;
- int rc;
+ u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
+ u64 shadow_hop_addr = shadow_addr & ~page_mask;
+ u64 pte_offset = shadow_addr & page_mask;
+ u64 phys_hop_addr;
- if (!hdev->mmu_enable)
- return 0;
-
- /* MMU HW init was already done in device hw_init() */
-
- mutex_init(&hdev->mmu_cache_lock);
-
- hdev->mmu_pgt_pool =
- gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
-
- if (!hdev->mmu_pgt_pool) {
- dev_err(hdev->dev, "Failed to create page gen pool\n");
- rc = -ENOMEM;
- goto err_pool_create;
- }
-
- rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
- prop->mmu_hop0_tables_total_size,
- prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
- -1);
- if (rc) {
- dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
- goto err_pool_add;
- }
-
- return 0;
-
-err_pool_add:
- gen_pool_destroy(hdev->mmu_pgt_pool);
-err_pool_create:
- mutex_destroy(&hdev->mmu_cache_lock);
+ if (shadow_hop_addr != get_hop0_addr(ctx))
+ phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
+ else
+ phys_hop_addr = get_phys_hop0_addr(ctx);
- return rc;
+ return phys_hop_addr + pte_offset;
}
-/*
- * hl_mmu_fini - release the mmu module.
- *
- * @hdev: pointer to the habanalabs device structure
- *
- * This function does the following:
- * - Disable mmu in hw
- * - free the pgts pool
- *
- * All ctxs should be freed before calling this func
- */
-void hl_mmu_fini(struct hl_device *hdev)
-{
- if (!hdev->mmu_enable)
- return;
-
- gen_pool_destroy(hdev->mmu_pgt_pool);
-
- mutex_destroy(&hdev->mmu_cache_lock);
-
- /* MMU HW fini will be done in device hw_fini() */
-}
-
-/**
- * hl_mmu_ctx_init() - initialize a context for using the MMU module.
- * @ctx: pointer to the context structure to initialize.
- *
- * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
- * page tables hops related to this context and an optional DRAM default page
- * mapping.
- * Return: 0 on success, non-zero otherwise.
- */
-int hl_mmu_ctx_init(struct hl_ctx *ctx)
+static int dram_default_mapping_init(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 num_of_hop3, total_hops, hop1_addr, hop2_addr, hop2_pte_addr,
- hop3_pte_addr, pte_val;
+ u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
+ hop2_pte_addr, hop3_pte_addr, pte_val;
int rc, i, j, hop3_allocated = 0;
- if (!hdev->mmu_enable)
- return 0;
-
- mutex_init(&ctx->mmu_lock);
- hash_init(ctx->mmu_hash);
-
if (!hdev->dram_supports_virtual_memory ||
!hdev->dram_default_page_mapping)
return 0;
@@ -269,10 +253,10 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
total_hops = num_of_hop3 + 2;
ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops, GFP_KERNEL);
- if (!ctx->dram_default_hops) {
- rc = -ENOMEM;
- goto alloc_err;
- }
+ if (!ctx->dram_default_hops)
+ return -ENOMEM;
+
+ hop0_addr = get_hop0_addr(ctx);
hop1_addr = alloc_hop(ctx);
if (hop1_addr == ULLONG_MAX) {
@@ -304,17 +288,17 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
/* need only pte 0 in hops 0 and 1 */
pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- hdev->asic_funcs->write_pte(hdev, get_hop0_addr(ctx), pte_val);
+ write_pte(ctx, hop0_addr, pte_val);
pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
- hdev->asic_funcs->write_pte(hdev, hop1_addr, pte_val);
+ write_pte(ctx, hop1_addr, pte_val);
get_pte(ctx, hop1_addr);
hop2_pte_addr = hop2_addr;
for (i = 0 ; i < num_of_hop3 ; i++) {
pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) |
PAGE_PRESENT_MASK;
- hdev->asic_funcs->write_pte(hdev, hop2_pte_addr, pte_val);
+ write_pte(ctx, hop2_pte_addr, pte_val);
get_pte(ctx, hop2_addr);
hop2_pte_addr += HL_PTE_SIZE;
}
@@ -325,33 +309,183 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
for (i = 0 ; i < num_of_hop3 ; i++) {
hop3_pte_addr = ctx->dram_default_hops[i];
for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
- hdev->asic_funcs->write_pte(hdev, hop3_pte_addr,
- pte_val);
+ write_final_pte(ctx, hop3_pte_addr, pte_val);
get_pte(ctx, ctx->dram_default_hops[i]);
hop3_pte_addr += HL_PTE_SIZE;
}
}
- /* flush all writes to reach PCI */
- mb();
- hdev->asic_funcs->read_pte(hdev, hop2_addr);
+ flush(ctx);
return 0;
hop3_err:
for (i = 0 ; i < hop3_allocated ; i++)
free_hop(ctx, ctx->dram_default_hops[i]);
+
free_hop(ctx, hop2_addr);
hop2_err:
free_hop(ctx, hop1_addr);
hop1_err:
kfree(ctx->dram_default_hops);
-alloc_err:
- mutex_destroy(&ctx->mmu_lock);
return rc;
}
+static void dram_default_mapping_fini(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
+ hop2_pte_addr, hop3_pte_addr;
+ int i, j;
+
+ if (!hdev->dram_supports_virtual_memory ||
+ !hdev->dram_default_page_mapping)
+ return;
+
+ num_of_hop3 = prop->dram_size_for_default_page_mapping;
+ do_div(num_of_hop3, prop->dram_page_size);
+ do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
+
+ hop0_addr = get_hop0_addr(ctx);
+ /* add hop1 and hop2 */
+ total_hops = num_of_hop3 + 2;
+ hop1_addr = ctx->dram_default_hops[total_hops - 1];
+ hop2_addr = ctx->dram_default_hops[total_hops - 2];
+
+ for (i = 0 ; i < num_of_hop3 ; i++) {
+ hop3_pte_addr = ctx->dram_default_hops[i];
+ for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
+ clear_pte(ctx, hop3_pte_addr);
+ put_pte(ctx, ctx->dram_default_hops[i]);
+ hop3_pte_addr += HL_PTE_SIZE;
+ }
+ }
+
+ hop2_pte_addr = hop2_addr;
+ hop2_pte_addr = hop2_addr;
+ for (i = 0 ; i < num_of_hop3 ; i++) {
+ clear_pte(ctx, hop2_pte_addr);
+ put_pte(ctx, hop2_addr);
+ hop2_pte_addr += HL_PTE_SIZE;
+ }
+
+ clear_pte(ctx, hop1_addr);
+ put_pte(ctx, hop1_addr);
+ clear_pte(ctx, hop0_addr);
+
+ kfree(ctx->dram_default_hops);
+
+ flush(ctx);
+}
+
+/**
+ * hl_mmu_init() - initialize the MMU module.
+ * @hdev: habanalabs device structure.
+ *
+ * This function does the following:
+ * - Allocate max_asid zeroed hop0 pgts so no mapping is available.
+ * - Enable MMU in H/W.
+ * - Invalidate the MMU cache.
+ * - Create a pool of pages for pgt_infos.
+ *
+ * This function depends on DMA QMAN to be working!
+ *
+ * Return: 0 for success, non-zero for failure.
+ */
+int hl_mmu_init(struct hl_device *hdev)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ int rc;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ /* MMU H/W init was already done in device hw_init() */
+
+ mutex_init(&hdev->mmu_cache_lock);
+
+ hdev->mmu_pgt_pool =
+ gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
+
+ if (!hdev->mmu_pgt_pool) {
+ dev_err(hdev->dev, "Failed to create page gen pool\n");
+ rc = -ENOMEM;
+ goto err_pool_create;
+ }
+
+ rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
+ prop->mmu_hop0_tables_total_size,
+ prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
+ -1);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
+ goto err_pool_add;
+ }
+
+ hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
+ prop->mmu_hop_table_size,
+ GFP_KERNEL | __GFP_ZERO);
+ if (!hdev->mmu_shadow_hop0) {
+ rc = -ENOMEM;
+ goto err_pool_add;
+ }
+
+ return 0;
+
+err_pool_add:
+ gen_pool_destroy(hdev->mmu_pgt_pool);
+err_pool_create:
+ mutex_destroy(&hdev->mmu_cache_lock);
+
+ return rc;
+}
+
+/**
+ * hl_mmu_fini() - release the MMU module.
+ * @hdev: habanalabs device structure.
+ *
+ * This function does the following:
+ * - Disable MMU in H/W.
+ * - Free the pgt_infos pool.
+ *
+ * All contexts should be freed before calling this function.
+ */
+void hl_mmu_fini(struct hl_device *hdev)
+{
+ if (!hdev->mmu_enable)
+ return;
+
+ kvfree(hdev->mmu_shadow_hop0);
+ gen_pool_destroy(hdev->mmu_pgt_pool);
+ mutex_destroy(&hdev->mmu_cache_lock);
+
+ /* MMU H/W fini will be done in device hw_fini() */
+}
+
+/**
+ * hl_mmu_ctx_init() - initialize a context for using the MMU module.
+ * @ctx: pointer to the context structure to initialize.
+ *
+ * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
+ * page tables hops related to this context.
+ * Return: 0 on success, non-zero otherwise.
+ */
+int hl_mmu_ctx_init(struct hl_ctx *ctx)
+{
+ struct hl_device *hdev = ctx->hdev;
+
+ if (!hdev->mmu_enable)
+ return 0;
+
+ mutex_init(&ctx->mmu_lock);
+ hash_init(ctx->mmu_phys_hash);
+ hash_init(ctx->mmu_shadow_hash);
+
+ return dram_default_mapping_init(ctx);
+}
+
/*
* hl_mmu_ctx_fini - disable a ctx from using the mmu module
*
@@ -365,63 +499,23 @@ alloc_err:
void hl_mmu_ctx_fini(struct hl_ctx *ctx)
{
struct hl_device *hdev = ctx->hdev;
- struct asic_fixed_properties *prop = &hdev->asic_prop;
struct pgt_info *pgt_info;
struct hlist_node *tmp;
- u64 num_of_hop3, total_hops, hop1_addr, hop2_addr, hop2_pte_addr,
- hop3_pte_addr;
- int i, j;
+ int i;
- if (!ctx->hdev->mmu_enable)
+ if (!hdev->mmu_enable)
return;
- if (hdev->dram_supports_virtual_memory &&
- hdev->dram_default_page_mapping) {
-
- num_of_hop3 = prop->dram_size_for_default_page_mapping;
- do_div(num_of_hop3, prop->dram_page_size);
- do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
-
- /* add hop1 and hop2 */
- total_hops = num_of_hop3 + 2;
- hop1_addr = ctx->dram_default_hops[total_hops - 1];
- hop2_addr = ctx->dram_default_hops[total_hops - 2];
-
- for (i = 0 ; i < num_of_hop3 ; i++) {
- hop3_pte_addr = ctx->dram_default_hops[i];
- for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
- clear_pte(hdev, hop3_pte_addr);
- put_pte(ctx, ctx->dram_default_hops[i]);
- hop3_pte_addr += HL_PTE_SIZE;
- }
- }
+ dram_default_mapping_fini(ctx);
- hop2_pte_addr = hop2_addr;
- for (i = 0 ; i < num_of_hop3 ; i++) {
- clear_pte(hdev, hop2_pte_addr);
- put_pte(ctx, hop2_addr);
- hop2_pte_addr += HL_PTE_SIZE;
- }
-
- clear_pte(hdev, hop1_addr);
- put_pte(ctx, hop1_addr);
- clear_pte(hdev, get_hop0_addr(ctx));
-
- kfree(ctx->dram_default_hops);
-
- /* flush all writes to reach PCI */
- mb();
- hdev->asic_funcs->read_pte(hdev, hop2_addr);
- }
-
- if (!hash_empty(ctx->mmu_hash))
+ if (!hash_empty(ctx->mmu_shadow_hash))
dev_err(hdev->dev, "ctx is freed while it has pgts in use\n");
- hash_for_each_safe(ctx->mmu_hash, i, tmp, pgt_info, node) {
+ hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
dev_err(hdev->dev,
"pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
- pgt_info->addr, ctx->asid, pgt_info->num_of_ptes);
- free_hop(ctx, pgt_info->addr);
+ pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
+ free_hop(ctx, pgt_info->shadow_addr);
}
mutex_destroy(&ctx->mmu_lock);
@@ -437,45 +531,43 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
hop3_addr = 0, hop3_pte_addr = 0,
hop4_addr = 0, hop4_pte_addr = 0,
curr_pte;
- int clear_hop3 = 1;
- bool is_dram_addr, is_huge, is_dram_default_page_mapping;
+ bool is_dram_addr, is_huge, clear_hop3 = true;
is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
prop->va_space_dram_start_address,
prop->va_space_dram_end_address);
hop0_addr = get_hop0_addr(ctx);
-
hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
- hop1_addr = get_next_hop_addr(curr_pte);
+ hop1_addr = get_next_hop_addr(ctx, curr_pte);
if (hop1_addr == ULLONG_MAX)
goto not_mapped;
hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
- hop2_addr = get_next_hop_addr(curr_pte);
+ hop2_addr = get_next_hop_addr(ctx, curr_pte);
if (hop2_addr == ULLONG_MAX)
goto not_mapped;
hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
- hop3_addr = get_next_hop_addr(curr_pte);
+ hop3_addr = get_next_hop_addr(ctx, curr_pte);
if (hop3_addr == ULLONG_MAX)
goto not_mapped;
hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
is_huge = curr_pte & LAST_MASK;
@@ -485,27 +577,24 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
return -EFAULT;
}
- is_dram_default_page_mapping =
- hdev->dram_default_page_mapping && is_dram_addr;
-
if (!is_huge) {
- hop4_addr = get_next_hop_addr(curr_pte);
+ hop4_addr = get_next_hop_addr(ctx, curr_pte);
if (hop4_addr == ULLONG_MAX)
goto not_mapped;
hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
- clear_hop3 = 0;
+ clear_hop3 = false;
}
- if (is_dram_default_page_mapping) {
- u64 zero_pte = (prop->mmu_dram_default_page_addr &
+ if (hdev->dram_default_page_mapping && is_dram_addr) {
+ u64 default_pte = (prop->mmu_dram_default_page_addr &
PTE_PHYS_ADDR_MASK) | LAST_MASK |
PAGE_PRESENT_MASK;
- if (curr_pte == zero_pte) {
+ if (curr_pte == default_pte) {
dev_err(hdev->dev,
"DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
virt_addr);
@@ -519,40 +608,43 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
goto not_mapped;
}
- hdev->asic_funcs->write_pte(hdev, hop3_pte_addr, zero_pte);
+ write_final_pte(ctx, hop3_pte_addr, default_pte);
put_pte(ctx, hop3_addr);
} else {
if (!(curr_pte & PAGE_PRESENT_MASK))
goto not_mapped;
- clear_pte(hdev, hop4_addr ? hop4_pte_addr : hop3_pte_addr);
+ if (hop4_addr)
+ clear_pte(ctx, hop4_pte_addr);
+ else
+ clear_pte(ctx, hop3_pte_addr);
if (hop4_addr && !put_pte(ctx, hop4_addr))
- clear_hop3 = 1;
+ clear_hop3 = true;
if (!clear_hop3)
goto flush;
- clear_pte(hdev, hop3_pte_addr);
+
+ clear_pte(ctx, hop3_pte_addr);
if (put_pte(ctx, hop3_addr))
goto flush;
- clear_pte(hdev, hop2_pte_addr);
+
+ clear_pte(ctx, hop2_pte_addr);
if (put_pte(ctx, hop2_addr))
goto flush;
- clear_pte(hdev, hop1_pte_addr);
+
+ clear_pte(ctx, hop1_pte_addr);
if (put_pte(ctx, hop1_addr))
goto flush;
- clear_pte(hdev, hop0_pte_addr);
+
+ clear_pte(ctx, hop0_pte_addr);
}
flush:
- /* flush all writes from all cores to reach PCI */
- mb();
-
- hdev->asic_funcs->read_pte(hdev,
- hop4_addr ? hop4_pte_addr : hop3_pte_addr);
+ flush(ctx);
return 0;
@@ -632,8 +724,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
hop4_addr = 0, hop4_pte_addr = 0,
curr_pte = 0;
bool hop1_new = false, hop2_new = false, hop3_new = false,
- hop4_new = false, is_huge, is_dram_addr,
- is_dram_default_page_mapping;
+ hop4_new = false, is_huge, is_dram_addr;
int rc = -ENOMEM;
/*
@@ -654,59 +745,46 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
return -EFAULT;
}
- is_dram_default_page_mapping =
- hdev->dram_default_page_mapping && is_dram_addr;
-
hop0_addr = get_hop0_addr(ctx);
-
hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
-
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
-
if (hop1_addr == ULLONG_MAX)
goto err;
hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
-
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
-
if (hop2_addr == ULLONG_MAX)
goto err;
hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
-
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
-
if (hop3_addr == ULLONG_MAX)
goto err;
hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
-
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
if (!is_huge) {
hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
-
if (hop4_addr == ULLONG_MAX)
goto err;
hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
-
- curr_pte = hdev->asic_funcs->read_pte(hdev, hop4_pte_addr);
+ curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
}
- if (is_dram_default_page_mapping) {
- u64 zero_pte = (prop->mmu_dram_default_page_addr &
+ if (hdev->dram_default_page_mapping && is_dram_addr) {
+ u64 default_pte = (prop->mmu_dram_default_page_addr &
PTE_PHYS_ADDR_MASK) | LAST_MASK |
PAGE_PRESENT_MASK;
- if (curr_pte != zero_pte) {
+ if (curr_pte != default_pte) {
dev_err(hdev->dev,
"DRAM: mapping already exists for virt_addr 0x%llx\n",
virt_addr);
@@ -722,27 +800,22 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
}
} else if (curr_pte & PAGE_PRESENT_MASK) {
dev_err(hdev->dev,
- "mapping already exists for virt_addr 0x%llx\n",
- virt_addr);
+ "mapping already exists for virt_addr 0x%llx\n",
+ virt_addr);
dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
- hdev->asic_funcs->read_pte(hdev, hop0_pte_addr),
- hop0_pte_addr);
+ *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
- hdev->asic_funcs->read_pte(hdev, hop1_pte_addr),
- hop1_pte_addr);
+ *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
- hdev->asic_funcs->read_pte(hdev, hop2_pte_addr),
- hop2_pte_addr);
+ *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
- hdev->asic_funcs->read_pte(hdev, hop3_pte_addr),
- hop3_pte_addr);
+ *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
if (!is_huge)
dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
- hdev->asic_funcs->read_pte(hdev,
- hop4_pte_addr),
- hop4_pte_addr);
+ *(u64 *) (uintptr_t) hop4_pte_addr,
+ hop4_pte_addr);
rc = -EINVAL;
goto err;
@@ -751,28 +824,26 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK
| PAGE_PRESENT_MASK;
- hdev->asic_funcs->write_pte(hdev,
- is_huge ? hop3_pte_addr : hop4_pte_addr,
- curr_pte);
+ if (is_huge)
+ write_final_pte(ctx, hop3_pte_addr, curr_pte);
+ else
+ write_final_pte(ctx, hop4_pte_addr, curr_pte);
if (hop1_new) {
- curr_pte = (hop1_addr & PTE_PHYS_ADDR_MASK) |
- PAGE_PRESENT_MASK;
- ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop0_pte_addr,
- curr_pte);
+ curr_pte =
+ (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop0_pte_addr, curr_pte);
}
if (hop2_new) {
- curr_pte = (hop2_addr & PTE_PHYS_ADDR_MASK) |
- PAGE_PRESENT_MASK;
- ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop1_pte_addr,
- curr_pte);
+ curr_pte =
+ (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop1_pte_addr, curr_pte);
get_pte(ctx, hop1_addr);
}
if (hop3_new) {
- curr_pte = (hop3_addr & PTE_PHYS_ADDR_MASK) |
- PAGE_PRESENT_MASK;
- ctx->hdev->asic_funcs->write_pte(ctx->hdev, hop2_pte_addr,
- curr_pte);
+ curr_pte =
+ (hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
+ write_pte(ctx, hop2_pte_addr, curr_pte);
get_pte(ctx, hop2_addr);
}
@@ -780,8 +851,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
if (hop4_new) {
curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) |
PAGE_PRESENT_MASK;
- ctx->hdev->asic_funcs->write_pte(ctx->hdev,
- hop3_pte_addr, curr_pte);
+ write_pte(ctx, hop3_pte_addr, curr_pte);
get_pte(ctx, hop3_addr);
}
@@ -790,11 +860,7 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
get_pte(ctx, hop3_addr);
}
- /* flush all writes from all cores to reach PCI */
- mb();
-
- hdev->asic_funcs->read_pte(hdev,
- is_huge ? hop3_pte_addr : hop4_pte_addr);
+ flush(ctx);
return 0;
diff --git a/drivers/misc/habanalabs/pci.c b/drivers/misc/habanalabs/pci.c
new file mode 100644
index 000000000000..d472d02a8e6e
--- /dev/null
+++ b/drivers/misc/habanalabs/pci.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2016-2019 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include "habanalabs.h"
+#include "include/hw_ip/pci/pci_general.h"
+
+#include <linux/pci.h>
+
+/**
+ * hl_pci_bars_map() - Map PCI BARs.
+ * @hdev: Pointer to hl_device structure.
+ * @bar_name: Array of BAR names.
+ * @is_wc: Array with flag per BAR whether a write-combined mapping is needed.
+ *
+ * Request PCI regions and map them to kernel virtual addresses.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
+ bool is_wc[3])
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int rc, i, bar;
+
+ rc = pci_request_regions(pdev, HL_NAME);
+ if (rc) {
+ dev_err(hdev->dev, "Cannot obtain PCI resources\n");
+ return rc;
+ }
+
+ for (i = 0 ; i < 3 ; i++) {
+ bar = i * 2; /* 64-bit BARs */
+ hdev->pcie_bar[bar] = is_wc[i] ?
+ pci_ioremap_wc_bar(pdev, bar) :
+ pci_ioremap_bar(pdev, bar);
+ if (!hdev->pcie_bar[bar]) {
+ dev_err(hdev->dev, "pci_ioremap%s_bar failed for %s\n",
+ is_wc[i] ? "_wc" : "", name[i]);
+ rc = -ENODEV;
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ for (i = 2 ; i >= 0 ; i--) {
+ bar = i * 2; /* 64-bit BARs */
+ if (hdev->pcie_bar[bar])
+ iounmap(hdev->pcie_bar[bar]);
+ }
+
+ pci_release_regions(pdev);
+
+ return rc;
+}
+
+/*
+ * hl_pci_bars_unmap() - Unmap PCI BARS.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Release all PCI BARs and unmap their virtual addresses.
+ */
+static void hl_pci_bars_unmap(struct hl_device *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int i, bar;
+
+ for (i = 2 ; i >= 0 ; i--) {
+ bar = i * 2; /* 64-bit BARs */
+ iounmap(hdev->pcie_bar[bar]);
+ }
+
+ pci_release_regions(pdev);
+}
+
+/*
+ * hl_pci_elbi_write() - Write through the ELBI interface.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+static int hl_pci_elbi_write(struct hl_device *hdev, u64 addr, u32 data)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ ktime_t timeout;
+ u32 val;
+
+ /* Clear previous status */
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, 0);
+
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_ADDR, (u32) addr);
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_DATA, data);
+ pci_write_config_dword(pdev, mmPCI_CONFIG_ELBI_CTRL,
+ PCI_CONFIG_ELBI_CTRL_WRITE);
+
+ timeout = ktime_add_ms(ktime_get(), 10);
+ for (;;) {
+ pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS, &val);
+ if (val & PCI_CONFIG_ELBI_STS_MASK)
+ break;
+ if (ktime_compare(ktime_get(), timeout) > 0) {
+ pci_read_config_dword(pdev, mmPCI_CONFIG_ELBI_STS,
+ &val);
+ break;
+ }
+
+ usleep_range(300, 500);
+ }
+
+ if ((val & PCI_CONFIG_ELBI_STS_MASK) == PCI_CONFIG_ELBI_STS_DONE)
+ return 0;
+
+ if (val & PCI_CONFIG_ELBI_STS_ERR) {
+ dev_err(hdev->dev, "Error writing to ELBI\n");
+ return -EIO;
+ }
+
+ if (!(val & PCI_CONFIG_ELBI_STS_MASK)) {
+ dev_err(hdev->dev, "ELBI write didn't finish in time\n");
+ return -EIO;
+ }
+
+ dev_err(hdev->dev, "ELBI write has undefined bits in status\n");
+ return -EIO;
+}
+
+/**
+ * hl_pci_iatu_write() - iatu write routine.
+ * @hdev: Pointer to hl_device structure.
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 dbi_offset;
+ int rc;
+
+ dbi_offset = addr & 0xFFF;
+
+ rc = hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0x00300000);
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_dbi_base_address + dbi_offset,
+ data);
+
+ if (rc)
+ return -EIO;
+
+ return 0;
+}
+
+/*
+ * hl_pci_reset_link_through_bridge() - Reset PCI link.
+ * @hdev: Pointer to hl_device structure.
+ */
+static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ struct pci_dev *parent_port;
+ u16 val;
+
+ parent_port = pdev->bus->self;
+ pci_read_config_word(parent_port, PCI_BRIDGE_CONTROL, &val);
+ val |= PCI_BRIDGE_CTL_BUS_RESET;
+ pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
+ ssleep(1);
+
+ val &= ~(PCI_BRIDGE_CTL_BUS_RESET);
+ pci_write_config_word(parent_port, PCI_BRIDGE_CONTROL, val);
+ ssleep(3);
+}
+
+/**
+ * hl_pci_set_dram_bar_base() - Set DDR BAR to map specific device address.
+ * @hdev: Pointer to hl_device structure.
+ * @inbound_region: Inbound region number.
+ * @bar: PCI BAR number.
+ * @addr: Address in DRAM. Must be aligned to DRAM bar size.
+ *
+ * Configure the iATU so that the DRAM bar will start at the specified address.
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
+ u64 addr)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 offset;
+ int rc;
+
+ switch (inbound_region) {
+ case 0:
+ offset = 0x100;
+ break;
+ case 1:
+ offset = 0x300;
+ break;
+ case 2:
+ offset = 0x500;
+ break;
+ default:
+ dev_err(hdev->dev, "Invalid inbound region %d\n",
+ inbound_region);
+ return -EINVAL;
+ }
+
+ if (bar != 0 && bar != 2 && bar != 4) {
+ dev_err(hdev->dev, "Invalid PCI BAR %d\n", bar);
+ return -EINVAL;
+ }
+
+ /* Point to the specified address */
+ rc = hl_pci_iatu_write(hdev, offset + 0x14, lower_32_bits(addr));
+ rc |= hl_pci_iatu_write(hdev, offset + 0x18, upper_32_bits(addr));
+ rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
+ /* Enable + BAR match + match enable + BAR number */
+ rc |= hl_pci_iatu_write(hdev, offset + 0x4, 0xC0080000 | (bar << 8));
+
+ /* Return the DBI window to the default location */
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+
+ if (rc)
+ dev_err(hdev->dev, "failed to map DRAM bar to 0x%08llx\n",
+ addr);
+
+ return rc;
+}
+
+/**
+ * hl_pci_init_iatu() - Initialize the iATU unit inside the PCI controller.
+ * @hdev: Pointer to hl_device structure.
+ * @sram_base_address: SRAM base address.
+ * @dram_base_address: DRAM base address.
+ * @host_phys_size: Size of host memory for device transactions.
+ *
+ * This is needed in case the firmware doesn't initialize the iATU.
+ *
+ * Return: 0 on success, negative value for failure.
+ */
+int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
+ u64 dram_base_address, u64 host_phys_size)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u64 host_phys_end_addr;
+ int rc = 0;
+
+ /* Inbound Region 0 - Bar 0 - Point to SRAM base address */
+ rc = hl_pci_iatu_write(hdev, 0x114, lower_32_bits(sram_base_address));
+ rc |= hl_pci_iatu_write(hdev, 0x118, upper_32_bits(sram_base_address));
+ rc |= hl_pci_iatu_write(hdev, 0x100, 0);
+ /* Enable + Bar match + match enable */
+ rc |= hl_pci_iatu_write(hdev, 0x104, 0xC0080000);
+
+ /* Point to DRAM */
+ if (!hdev->asic_funcs->set_dram_bar_base)
+ return -EINVAL;
+ rc |= hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address);
+
+ /* Outbound Region 0 - Point to Host */
+ host_phys_end_addr = prop->host_phys_base_address + host_phys_size - 1;
+ rc |= hl_pci_iatu_write(hdev, 0x008,
+ lower_32_bits(prop->host_phys_base_address));
+ rc |= hl_pci_iatu_write(hdev, 0x00C,
+ upper_32_bits(prop->host_phys_base_address));
+ rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
+ rc |= hl_pci_iatu_write(hdev, 0x014, 0);
+ rc |= hl_pci_iatu_write(hdev, 0x018, 0);
+ rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(host_phys_end_addr));
+ /* Increase region size */
+ rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
+ /* Enable */
+ rc |= hl_pci_iatu_write(hdev, 0x004, 0x80000000);
+
+ /* Return the DBI window to the default location */
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
+ rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);
+
+ if (rc)
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * hl_pci_set_dma_mask() - Set DMA masks for the device.
+ * @hdev: Pointer to hl_device structure.
+ * @dma_mask: number of bits for the requested dma mask.
+ *
+ * This function sets the DMA masks (regular and consistent) for a specified
+ * value. If it doesn't succeed, it tries to set it to a fall-back value
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_pci_set_dma_mask(struct hl_device *hdev, u8 dma_mask)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int rc;
+
+ /* set DMA mask */
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+ if (rc) {
+ dev_warn(hdev->dev,
+ "Failed to set pci dma mask to %d bits, error %d\n",
+ dma_mask, rc);
+
+ dma_mask = hdev->dma_mask;
+
+ rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to set pci dma mask to %d bits, error %d\n",
+ dma_mask, rc);
+ return rc;
+ }
+ }
+
+ /*
+ * We managed to set the dma mask, so update the dma mask field. If
+ * the set to the coherent mask will fail with that mask, we will
+ * fail the entire function
+ */
+ hdev->dma_mask = dma_mask;
+
+ rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_mask));
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to set pci consistent dma mask to %d bits, error %d\n",
+ dma_mask, rc);
+ return rc;
+ }
+
+ return 0;
+}
+
+/**
+ * hl_pci_init() - PCI initialization code.
+ * @hdev: Pointer to hl_device structure.
+ * @dma_mask: number of bits for the requested dma mask.
+ *
+ * Set DMA masks, initialize the PCI controller and map the PCI BARs.
+ *
+ * Return: 0 on success, non-zero for failure.
+ */
+int hl_pci_init(struct hl_device *hdev, u8 dma_mask)
+{
+ struct pci_dev *pdev = hdev->pdev;
+ int rc;
+
+ rc = hl_pci_set_dma_mask(hdev, dma_mask);
+ if (rc)
+ return rc;
+
+ if (hdev->reset_pcilink)
+ hl_pci_reset_link_through_bridge(hdev);
+
+ rc = pci_enable_device_mem(pdev);
+ if (rc) {
+ dev_err(hdev->dev, "can't enable PCI device\n");
+ return rc;
+ }
+
+ pci_set_master(pdev);
+
+ rc = hdev->asic_funcs->init_iatu(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize iATU\n");
+ goto disable_device;
+ }
+
+ rc = hdev->asic_funcs->pci_bars_map(hdev);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to initialize PCI BARs\n");
+ goto disable_device;
+ }
+
+ return 0;
+
+disable_device:
+ pci_clear_master(pdev);
+ pci_disable_device(pdev);
+
+ return rc;
+}
+
+/**
+ * hl_fw_fini() - PCI finalization code.
+ * @hdev: Pointer to hl_device structure
+ *
+ * Unmap PCI bars and disable PCI device.
+ */
+void hl_pci_fini(struct hl_device *hdev)
+{
+ hl_pci_bars_unmap(hdev);
+
+ pci_clear_master(hdev->pdev);
+ pci_disable_device(hdev->pdev);
+}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 7fd6f633534c..613d431da783 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -20,8 +20,8 @@
/*
* Queue Numbering
*
- * The external queues (DMA channels + CPU) MUST be before the internal queues
- * and each group (DMA channels + CPU and internal) must be contiguous inside
+ * The external queues (PCI DMA channels) MUST be before the internal queues
+ * and each group (PCI DMA channels and internal) must be contiguous inside
* itself but there can be a gap between the two groups (although not
* recommended)
*/
@@ -33,7 +33,7 @@ enum goya_queue_id {
GOYA_QUEUE_ID_DMA_3,
GOYA_QUEUE_ID_DMA_4,
GOYA_QUEUE_ID_CPU_PQ,
- GOYA_QUEUE_ID_MME,
+ GOYA_QUEUE_ID_MME, /* Internal queues start here */
GOYA_QUEUE_ID_TPC0,
GOYA_QUEUE_ID_TPC1,
GOYA_QUEUE_ID_TPC2,
@@ -45,11 +45,18 @@ enum goya_queue_id {
GOYA_QUEUE_ID_SIZE
};
+enum hl_device_status {
+ HL_DEVICE_STATUS_OPERATIONAL,
+ HL_DEVICE_STATUS_IN_RESET,
+ HL_DEVICE_STATUS_MALFUNCTION
+};
+
/* Opcode for management ioctl */
#define HL_INFO_HW_IP_INFO 0
#define HL_INFO_HW_EVENTS 1
#define HL_INFO_DRAM_USAGE 2
#define HL_INFO_HW_IDLE 3
+#define HL_INFO_DEVICE_STATUS 4
#define HL_INFO_VERSION_MAX_LEN 128
@@ -82,6 +89,11 @@ struct hl_info_hw_idle {
__u32 pad;
};
+struct hl_info_device_status {
+ __u32 status;
+ __u32 pad;
+};
+
struct hl_info_args {
/* Location of relevant struct in userspace */
__u64 return_pointer;
@@ -181,7 +193,10 @@ struct hl_cs_in {
};
struct hl_cs_out {
- /* this holds the sequence number of the CS to pass to wait ioctl */
+ /*
+ * seq holds the sequence number of the CS to pass to wait ioctl. All
+ * values are valid except for 0 and ULLONG_MAX
+ */
__u64 seq;
/* HL_CS_STATUS_* */
__u32 status;
@@ -320,6 +335,107 @@ union hl_mem_args {
struct hl_mem_out out;
};
+#define HL_DEBUG_MAX_AUX_VALUES 10
+
+struct hl_debug_params_etr {
+ /* Address in memory to allocate buffer */
+ __u64 buffer_address;
+
+ /* Size of buffer to allocate */
+ __u64 buffer_size;
+
+ /* Sink operation mode: SW fifo, HW fifo, Circular buffer */
+ __u32 sink_mode;
+ __u32 pad;
+};
+
+struct hl_debug_params_etf {
+ /* Address in memory to allocate buffer */
+ __u64 buffer_address;
+
+ /* Size of buffer to allocate */
+ __u64 buffer_size;
+
+ /* Sink operation mode: SW fifo, HW fifo, Circular buffer */
+ __u32 sink_mode;
+ __u32 pad;
+};
+
+struct hl_debug_params_stm {
+ /* Two bit masks for HW event and Stimulus Port */
+ __u64 he_mask;
+ __u64 sp_mask;
+
+ /* Trace source ID */
+ __u32 id;
+
+ /* Frequency for the timestamp register */
+ __u32 frequency;
+};
+
+struct hl_debug_params_bmon {
+ /* Transaction address filter */
+ __u64 addr_range0;
+ __u64 addr_range1;
+
+ /* Capture window configuration */
+ __u32 bw_win;
+ __u32 win_capture;
+
+ /* Trace source ID */
+ __u32 id;
+ __u32 pad;
+};
+
+struct hl_debug_params_spmu {
+ /* Event types selection */
+ __u64 event_types[HL_DEBUG_MAX_AUX_VALUES];
+
+ /* Number of event types selection */
+ __u32 event_types_num;
+ __u32 pad;
+};
+
+/* Opcode for ETR component */
+#define HL_DEBUG_OP_ETR 0
+/* Opcode for ETF component */
+#define HL_DEBUG_OP_ETF 1
+/* Opcode for STM component */
+#define HL_DEBUG_OP_STM 2
+/* Opcode for FUNNEL component */
+#define HL_DEBUG_OP_FUNNEL 3
+/* Opcode for BMON component */
+#define HL_DEBUG_OP_BMON 4
+/* Opcode for SPMU component */
+#define HL_DEBUG_OP_SPMU 5
+/* Opcode for timestamp */
+#define HL_DEBUG_OP_TIMESTAMP 6
+
+struct hl_debug_args {
+ /*
+ * Pointer to user input structure.
+ * This field is relevant to specific opcodes.
+ */
+ __u64 input_ptr;
+ /* Pointer to user output structure */
+ __u64 output_ptr;
+ /* Size of user input structure */
+ __u32 input_size;
+ /* Size of user output structure */
+ __u32 output_size;
+ /* HL_DEBUG_OP_* */
+ __u32 op;
+ /*
+ * Register index in the component, taken from the debug_regs_index enum
+ * in the various ASIC header files
+ */
+ __u32 reg_idx;
+ /* Enable/disable */
+ __u32 enable;
+ /* Context ID - Currently not in use */
+ __u32 ctx_id;
+};
+
/*
* Various information operations such as:
* - H/W IP information
@@ -361,6 +477,12 @@ union hl_mem_args {
* Each JOB will be enqueued on a specific queue, according to the user's input.
* There can be more then one JOB per queue.
*
+ * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
+ * a second set is for "execution" phase and a third set is for "store" phase.
+ * The JOBS on the "restore" phase are enqueued only after context-switch
+ * (or if its the first CS for this context). The user can also order the
+ * driver to run the "restore" phase explicitly
+ *
* There are two types of queues - external and internal. External queues
* are DMA queues which transfer data from/to the Host. All other queues are
* internal. The driver will get completion notifications from the device only
@@ -377,19 +499,18 @@ union hl_mem_args {
* relevant queues. Therefore, the user mustn't assume the CS has been completed
* or has even started to execute.
*
- * Upon successful enqueue, the IOCTL returns an opaque handle which the user
+ * Upon successful enqueue, the IOCTL returns a sequence number which the user
* can use with the "Wait for CS" IOCTL to check whether the handle's CS
* external JOBS have been completed. Note that if the CS has internal JOBS
* which can execute AFTER the external JOBS have finished, the driver might
* report that the CS has finished executing BEFORE the internal JOBS have
* actually finish executing.
*
- * The CS IOCTL will receive three sets of JOBS. One set is for "restore" phase,
- * a second set is for "execution" phase and a third set is for "store" phase.
- * The JOBS on the "restore" phase are enqueued only after context-switch
- * (or if its the first CS for this context). The user can also order the
- * driver to run the "restore" phase explicitly
- *
+ * Even though the sequence number increments per CS, the user can NOT
+ * automatically assume that if CS with sequence number N finished, then CS
+ * with sequence number N-1 also finished. The user can make this assumption if
+ * and only if CS N and CS N-1 are exactly the same (same CBs for the same
+ * queues).
*/
#define HL_IOCTL_CS \
_IOWR('H', 0x03, union hl_cs_args)
@@ -444,7 +565,20 @@ union hl_mem_args {
#define HL_IOCTL_MEMORY \
_IOWR('H', 0x05, union hl_mem_args)
+/*
+ * Debug
+ * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces
+ *
+ * This IOCTL allows the user to get debug traces from the chip.
+ *
+ * The user needs to provide the register index and essential data such as
+ * buffer address and size.
+ *
+ */
+#define HL_IOCTL_DEBUG \
+ _IOWR('H', 0x06, struct hl_debug_args)
+
#define HL_COMMAND_START 0x01
-#define HL_COMMAND_END 0x06
+#define HL_COMMAND_END 0x07
#endif /* HABANALABS_H_ */