diff options
Diffstat (limited to 'drivers/misc/habanalabs/gaudi')
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 412 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudiP.h | 4 |
2 files changed, 107 insertions, 309 deletions
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 21c2b678ff72..fba322241096 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -95,7 +95,7 @@ #define GAUDI_NUM_OF_QM_ARB_ERR_CAUSE 3 -#define GAUDI_ARB_WDT_TIMEOUT 0x1000000 +#define GAUDI_ARB_WDT_TIMEOUT 0xEE6b27FF /* 8 seconds */ #define GAUDI_CLK_GATE_DEBUGFS_MASK (\ BIT(GAUDI_ENGINE_ID_MME_0) |\ @@ -557,6 +557,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) } prop->device_dma_offset_for_host_access = HOST_PHYS_BASE; + prop->host_base_address = HOST_PHYS_BASE; + prop->host_end_address = prop->host_base_address + HOST_PHYS_SIZE; prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES; prop->collective_first_sob = 0; prop->collective_first_mon = 0; @@ -595,18 +597,19 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->mmu_hop_table_size = HOP_TABLE_SIZE_512_PTE; prop->mmu_hop0_tables_total_size = HOP0_512_PTE_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; + prop->device_mem_alloc_default_page_size = prop->dram_page_size; prop->dram_supports_virtual_memory = false; - prop->pmmu.hop0_shift = MMU_V1_1_HOP0_SHIFT; - prop->pmmu.hop1_shift = MMU_V1_1_HOP1_SHIFT; - prop->pmmu.hop2_shift = MMU_V1_1_HOP2_SHIFT; - prop->pmmu.hop3_shift = MMU_V1_1_HOP3_SHIFT; - prop->pmmu.hop4_shift = MMU_V1_1_HOP4_SHIFT; - prop->pmmu.hop0_mask = MMU_V1_1_HOP0_MASK; - prop->pmmu.hop1_mask = MMU_V1_1_HOP1_MASK; - prop->pmmu.hop2_mask = MMU_V1_1_HOP2_MASK; - prop->pmmu.hop3_mask = MMU_V1_1_HOP3_MASK; - prop->pmmu.hop4_mask = MMU_V1_1_HOP4_MASK; + prop->pmmu.hop_shifts[MMU_HOP0] = MMU_V1_1_HOP0_SHIFT; + prop->pmmu.hop_shifts[MMU_HOP1] = MMU_V1_1_HOP1_SHIFT; + prop->pmmu.hop_shifts[MMU_HOP2] = MMU_V1_1_HOP2_SHIFT; + prop->pmmu.hop_shifts[MMU_HOP3] = MMU_V1_1_HOP3_SHIFT; + prop->pmmu.hop_shifts[MMU_HOP4] = MMU_V1_1_HOP4_SHIFT; + prop->pmmu.hop_masks[MMU_HOP0] = MMU_V1_1_HOP0_MASK; + prop->pmmu.hop_masks[MMU_HOP1] = MMU_V1_1_HOP1_MASK; + prop->pmmu.hop_masks[MMU_HOP2] = MMU_V1_1_HOP2_MASK; + prop->pmmu.hop_masks[MMU_HOP3] = MMU_V1_1_HOP3_MASK; + prop->pmmu.hop_masks[MMU_HOP4] = MMU_V1_1_HOP4_MASK; prop->pmmu.start_addr = VA_HOST_SPACE_START; prop->pmmu.end_addr = (VA_HOST_SPACE_START + VA_HOST_SPACE_SIZE / 2) - 1; @@ -673,6 +676,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->set_max_power_on_device_init = true; + prop->dma_mask = 48; + return 0; } @@ -754,8 +759,6 @@ static int gaudi_init_iatu(struct hl_device *hdev) if (rc) goto done; - hdev->asic_funcs->set_dma_mask_from_fw(hdev); - /* Outbound Region 0 - Point to Host */ outbound_region.addr = HOST_PHYS_BASE; outbound_region.size = HOST_PHYS_SIZE; @@ -1008,7 +1011,7 @@ free_job: release_cb: hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); return rc; } @@ -1470,7 +1473,7 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev, job->patched_cb = NULL; job->job_cb_size = job->user_cb_size; - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); /* increment refcount as for external queues we get completion */ if (hw_queue_prop->type == QUEUE_TYPE_EXT) @@ -2808,9 +2811,8 @@ static void gaudi_init_pci_dma_qman(struct hl_device *hdev, int dma_id, WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, QMAN_EXTERNAL_MAKE_TRUSTED); @@ -2987,9 +2989,8 @@ static void gaudi_init_hbm_dma_qman(struct hl_device *hdev, int dma_id, WREG32(mmDMA0_QM_ARB_ERR_MSG_EN + dma_qm_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmDMA0_QM_ARB_SLV_CHOISE_WDT + dma_qm_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmDMA0_QM_GLBL_CFG1 + dma_qm_offset, 0); WREG32(mmDMA0_QM_GLBL_PROT + dma_qm_offset, @@ -3124,9 +3125,8 @@ static void gaudi_init_mme_qman(struct hl_device *hdev, u32 mme_offset, WREG32(mmMME0_QM_ARB_ERR_MSG_EN + mme_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmMME0_QM_ARB_SLV_CHOISE_WDT + mme_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmMME0_QM_GLBL_CFG1 + mme_offset, 0); WREG32(mmMME0_QM_GLBL_PROT + mme_offset, @@ -3258,9 +3258,8 @@ static void gaudi_init_tpc_qman(struct hl_device *hdev, u32 tpc_offset, WREG32(mmTPC0_QM_ARB_ERR_MSG_EN + tpc_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmTPC0_QM_ARB_SLV_CHOISE_WDT + tpc_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmTPC0_QM_GLBL_CFG1 + tpc_offset, 0); WREG32(mmTPC0_QM_GLBL_PROT + tpc_offset, @@ -3409,9 +3408,8 @@ static void gaudi_init_nic_qman(struct hl_device *hdev, u32 nic_offset, WREG32(mmNIC0_QM0_ARB_ERR_MSG_EN + nic_offset, QM_ARB_ERR_MSG_EN_MASK); - /* Increase ARB WDT to support streams architecture */ - WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, - GAUDI_ARB_WDT_TIMEOUT); + /* Set timeout to maximum */ + WREG32(mmNIC0_QM0_ARB_SLV_CHOISE_WDT + nic_offset, GAUDI_ARB_WDT_TIMEOUT); WREG32(mmNIC0_QM0_GLBL_CFG1 + nic_offset, 0); WREG32(mmNIC0_QM0_GLBL_PROT + nic_offset, @@ -3792,9 +3790,6 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_ { u32 wait_timeout_ms; - dev_info(hdev->dev, - "Halting compute engines and disabling interrupts\n"); - if (hdev->pldm) wait_timeout_ms = GAUDI_PLDM_RESET_WAIT_MSEC; else @@ -4212,7 +4207,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset } if (fw_reset) { - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Firmware performs HARD reset, going to wait %dms\n", reset_timeout_ms); @@ -4304,11 +4299,11 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset WREG32(mmPSOC_GLOBAL_CONF_SW_ALL_RST, 1 << PSOC_GLOBAL_CONF_SW_ALL_RST_IND_SHIFT); - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Issued HARD reset command, going to wait %dms\n", reset_timeout_ms); } else { - dev_info(hdev->dev, + dev_dbg(hdev->dev, "Firmware performs HARD reset, going to wait %dms\n", reset_timeout_ms); } @@ -4745,12 +4740,11 @@ static void gaudi_dma_free_coherent(struct hl_device *hdev, size_t size, dma_free_coherent(&hdev->pdev->dev, size, cpu_addr, fixed_dma_handle); } -static int gaudi_hbm_scrubbing(struct hl_device *hdev) +static int gaudi_scrub_device_dram(struct hl_device *hdev, u64 val) { struct asic_fixed_properties *prop = &hdev->asic_prop; u64 cur_addr = DRAM_BASE_ADDR_USER; - u32 val; - u32 chunk_size; + u32 chunk_size, busy; int rc, dma_id; while (cur_addr < prop->dram_end_address) { @@ -4764,8 +4758,10 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev) "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n", cur_addr, cur_addr + chunk_size); - WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf); - WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf); + WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, + lower_32_bits(val)); + WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, + upper_32_bits(val)); WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset, lower_32_bits(cur_addr)); WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset, @@ -4788,8 +4784,8 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev) rc = hl_poll_timeout( hdev, mmDMA0_CORE_STS0 + dma_offset, - val, - ((val & DMA0_CORE_STS0_BUSY_MASK) == 0), + busy, + ((busy & DMA0_CORE_STS0_BUSY_MASK) == 0), 1000, HBM_SCRUBBING_TIMEOUT_US); @@ -4843,7 +4839,7 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size) } /* Scrub HBM using all DMA channels in parallel */ - rc = gaudi_hbm_scrubbing(hdev); + rc = gaudi_scrub_device_dram(hdev, 0xdeadbeaf); if (rc) dev_err(hdev->dev, "Failed to clear HBM in mem scrub all\n"); @@ -5038,37 +5034,7 @@ static void gaudi_cpu_accessible_dma_pool_free(struct hl_device *hdev, hl_fw_cpu_accessible_dma_pool_free(hdev, size, vaddr); } -static int gaudi_dma_map_sg(struct hl_device *hdev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - if (!dma_map_sg(&hdev->pdev->dev, sgl, nents, dir)) - return -ENOMEM; - - /* Shift to the device's base physical address of host memory */ - for_each_sg(sgl, sg, nents, i) - sg->dma_address += HOST_PHYS_BASE; - - return 0; -} - -static void gaudi_dma_unmap_sg(struct hl_device *hdev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - /* Cancel the device's base physical address of host memory */ - for_each_sg(sgl, sg, nents, i) - sg->dma_address -= HOST_PHYS_BASE; - - dma_unmap_sg(&hdev->pdev->dev, sgl, nents, dir); -} - -static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, - struct sg_table *sgt) +static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, struct sg_table *sgt) { struct scatterlist *sg, *sg_next_iter; u32 count, dma_desc_cnt; @@ -5077,8 +5043,7 @@ static u32 gaudi_get_dma_desc_list_size(struct hl_device *hdev, dma_desc_cnt = 0; - for_each_sg(sgt->sgl, sg, sgt->nents, count) { - + for_each_sgtable_dma_sg(sgt, sg, count) { len = sg_dma_len(sg); addr = sg_dma_address(sg); @@ -5132,8 +5097,7 @@ static int gaudi_pin_memory_before_cs(struct hl_device *hdev, list_add_tail(&userptr->job_node, parser->job_userptr_list); - rc = hdev->asic_funcs->asic_dma_map_sg(hdev, userptr->sgt->sgl, - userptr->sgt->nents, dir); + rc = hdev->asic_funcs->asic_dma_map_sgtable(hdev, userptr->sgt, dir); if (rc) { dev_err(hdev->dev, "failed to map sgt with DMA region\n"); goto unpin_memory; @@ -5408,7 +5372,7 @@ static int gaudi_patch_dma_packet(struct hl_device *hdev, sgt = userptr->sgt; dma_desc_cnt = 0; - for_each_sg(sgt->sgl, sg, sgt->nents, count) { + for_each_sgtable_dma_sg(sgt, sg, count) { len = sg_dma_len(sg); dma_addr = sg_dma_address(sg); @@ -5562,7 +5526,7 @@ static int gaudi_patch_cb(struct hl_device *hdev, static int gaudi_parse_cb_mmu(struct hl_device *hdev, struct hl_cs_parser *parser) { - u64 patched_cb_handle; + u64 handle; u32 patched_cb_size; struct hl_cb *user_cb; int rc; @@ -5578,9 +5542,9 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, else parser->patched_cb_size = parser->user_cb_size; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, parser->patched_cb_size, false, false, - &patched_cb_handle); + &handle); if (rc) { dev_err(hdev->dev, @@ -5589,13 +5553,10 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev, return rc; } - patched_cb_handle >>= PAGE_SHIFT; - parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, - (u32) patched_cb_handle); + parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); /* hl_cb_get should never fail */ if (!parser->patched_cb) { - dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n", - (u32) patched_cb_handle); + dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); rc = -EFAULT; goto out; } @@ -5635,8 +5596,7 @@ out: * cb_put will release it, but here we want to remove it from the * idr */ - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, - patched_cb_handle << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, handle); return rc; } @@ -5644,7 +5604,7 @@ out: static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, struct hl_cs_parser *parser) { - u64 patched_cb_handle; + u64 handle; int rc; rc = gaudi_validate_cb(hdev, parser, false); @@ -5652,22 +5612,19 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev, if (rc) goto free_userptr; - rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx, + rc = hl_cb_create(hdev, &hdev->kernel_mem_mgr, hdev->kernel_ctx, parser->patched_cb_size, false, false, - &patched_cb_handle); + &handle); if (rc) { dev_err(hdev->dev, "Failed to allocate patched CB for DMA CS %d\n", rc); goto free_userptr; } - patched_cb_handle >>= PAGE_SHIFT; - parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, - (u32) patched_cb_handle); + parser->patched_cb = hl_cb_get(&hdev->kernel_mem_mgr, handle); /* hl_cb_get should never fail here */ if (!parser->patched_cb) { - dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n", - (u32) patched_cb_handle); + dev_crit(hdev->dev, "DMA CB handle invalid 0x%llx\n", handle); rc = -EFAULT; goto out; } @@ -5684,8 +5641,7 @@ out: * cb_put will release it, but here we want to remove it from the * idr */ - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, - patched_cb_handle << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, handle); free_userptr: if (rc) @@ -5798,7 +5754,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, struct hl_cs_job *job; u32 cb_size, ctl, err_cause; struct hl_cb *cb; - u64 id; int rc; cb = hl_cb_kernel_create(hdev, PAGE_SIZE, false); @@ -5865,9 +5820,8 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr, } release_cb: - id = cb->id; hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); return rc; } @@ -5930,7 +5884,7 @@ static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base, release_cb: hl_cb_put(cb); - hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT); + hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); return rc; } @@ -6101,184 +6055,6 @@ static void gaudi_restore_phase_topology(struct hl_device *hdev) } -static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, - bool user_address, u32 *val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 hbm_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - - *val = RREG32(addr - CFG_BASE); - - } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { - - *val = readl(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); - - } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - - u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - - if (hbm_bar_addr != U64_MAX) { - *val = readl(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); - } - - if (hbm_bar_addr == U64_MAX) - rc = -EIO; - - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - - *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE); - - } else { - rc = -EFAULT; - } - - return rc; -} - -static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, - bool user_address, u32 val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 hbm_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr < CFG_BASE + CFG_SIZE)) { - - WREG32(addr - CFG_BASE, val); - - } else if ((addr >= SRAM_BASE_ADDR) && (addr < SRAM_BASE_ADDR + SRAM_BAR_SIZE)) { - - writel(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); - - } else if (addr < DRAM_PHYS_BASE + hdev->asic_prop.dram_size) { - - u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - - if (hbm_bar_addr != U64_MAX) { - writel(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); - } - - if (hbm_bar_addr == U64_MAX) - rc = -EIO; - - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - - *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; - - } else { - rc = -EFAULT; - } - - return rc; -} - -static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, - bool user_address, u64 *val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 hbm_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - - u32 val_l = RREG32(addr - CFG_BASE); - u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); - - *val = (((u64) val_h) << 32) | val_l; - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { - - *val = readq(hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); - - } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - - u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - - if (hbm_bar_addr != U64_MAX) { - *val = readq(hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); - } - - if (hbm_bar_addr == U64_MAX) - rc = -EIO; - - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - - *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); - - } else { - rc = -EFAULT; - } - - return rc; -} - -static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, - bool user_address, u64 val) -{ - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 hbm_bar_addr, host_phys_end; - int rc = 0; - - host_phys_end = HOST_PHYS_BASE + HOST_PHYS_SIZE; - - if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { - - WREG32(addr - CFG_BASE, lower_32_bits(val)); - WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); - - } else if ((addr >= SRAM_BASE_ADDR) && - (addr <= SRAM_BASE_ADDR + SRAM_BAR_SIZE - sizeof(u64))) { - - writeq(val, hdev->pcie_bar[SRAM_BAR_ID] + (addr - SRAM_BASE_ADDR)); - - } else if (addr <= DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64)) { - - u64 bar_base_addr = DRAM_PHYS_BASE + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); - - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, bar_base_addr); - - if (hbm_bar_addr != U64_MAX) { - writeq(val, hdev->pcie_bar[HBM_BAR_ID] + (addr - bar_base_addr)); - hbm_bar_addr = gaudi_set_hbm_bar_base(hdev, hbm_bar_addr); - } - - if (hbm_bar_addr == U64_MAX) - rc = -EIO; - - } else if (addr >= HOST_PHYS_BASE && addr < host_phys_end && - user_address && !iommu_present(&pci_bus_type)) { - - *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; - - } else { - rc = -EFAULT; - } - - return rc; -} - static int gaudi_dma_core_transfer(struct hl_device *hdev, int dma_id, u64 addr, u32 size_to_dma, dma_addr_t dma_addr) { @@ -7628,19 +7404,18 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type, gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); /* In case it's the first razwi, save its parameters*/ - rc = atomic_cmpxchg(&hdev->last_error.razwi_write_disable, 0, 1); + rc = atomic_cmpxchg(&hdev->last_error.razwi.write_disable, 0, 1); if (!rc) { - hdev->last_error.open_dev_timestamp = hdev->last_successful_open_ktime; - hdev->last_error.razwi_timestamp = ktime_get(); - hdev->last_error.razwi_addr = razwi_addr; - hdev->last_error.razwi_engine_id_1 = engine_id_1; - hdev->last_error.razwi_engine_id_2 = engine_id_2; + hdev->last_error.razwi.timestamp = ktime_get(); + hdev->last_error.razwi.addr = razwi_addr; + hdev->last_error.razwi.engine_id_1 = engine_id_1; + hdev->last_error.razwi.engine_id_2 = engine_id_2; /* * If first engine id holds non valid value the razwi initiator * does not have engine id */ - hdev->last_error.razwi_non_engine_initiator = (engine_id_1 == U16_MAX); - hdev->last_error.razwi_type = razwi_type; + hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX); + hdev->last_error.razwi.type = razwi_type; } } @@ -8103,7 +7878,6 @@ static void gaudi_handle_eqe(struct hl_device *hdev, case GAUDI_EVENT_MMU_PAGE_FAULT: case GAUDI_EVENT_MMU_WR_PERM: case GAUDI_EVENT_RAZWI_OR_ADC: - case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: fallthrough; @@ -8123,6 +7897,19 @@ static void gaudi_handle_eqe(struct hl_device *hdev, hl_fw_unmask_irq(hdev, event_type); break; + case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: + gaudi_print_irq_info(hdev, event_type, true); + gaudi_handle_qman_err(hdev, event_type); + hl_fw_unmask_irq(hdev, event_type); + + /* In TPC QM event, notify on TPC assertion. While there isn't + * a specific event for assertion yet, the FW generates QM event. + * The SW upper layer will inspect an internal mapped area to indicate + * if the event is a tpc assertion or tpc QM. + */ + hl_notifier_event_send_all(hdev, HL_NOTIFIER_EVENT_TPC_ASSERT); + break; + case GAUDI_EVENT_RAZWI_OR_ADC_SW: gaudi_print_irq_info(hdev, event_type, true); goto reset_device; @@ -8328,8 +8115,6 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev) set_default_power_values(hdev); - hdev->max_power = prop->max_power_default; - return 0; } @@ -8501,6 +8286,16 @@ static int gaudi_get_eeprom_data(struct hl_device *hdev, void *data, return hl_fw_get_eeprom_data(hdev, data, max_size); } +static int gaudi_get_monitor_dump(struct hl_device *hdev, void *data) +{ + struct gaudi_device *gaudi = hdev->asic_specific; + + if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) + return 0; + + return hl_fw_get_monitor_dump(hdev, data); +} + /* * this function should be used only during initialization and/or after reset, * when there are no active users. @@ -9066,11 +8861,6 @@ static void gaudi_reset_sob(struct hl_device *hdev, void *data) kref_init(&hw_sob->kref); } -static void gaudi_set_dma_mask_from_fw(struct hl_device *hdev) -{ - hdev->dma_mask = 48; -} - static u64 gaudi_get_device_time(struct hl_device *hdev) { u64 device_time = ((u64) RREG32(mmPSOC_TIMESTAMP_CNTCVU)) << 32; @@ -9132,7 +8922,7 @@ static int gaudi_add_sync_to_engine_map_entry( */ if (reg_value == 0 || reg_value == 0xffffffff) return 0; - reg_value -= (u32)CFG_BASE; + reg_value -= lower_32_bits(CFG_BASE); /* create a new hash entry */ entry = kzalloc(sizeof(*entry), GFP_KERNEL); @@ -9377,6 +9167,12 @@ static u32 *gaudi_get_stream_master_qid_arr(void) return gaudi_stream_master; } +static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_sizes *info) +{ + /* set 0 since multiple pages are not supported */ + info->page_order_bitmask = 0; +} + static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hl_device *hdev = dev_get_drvdata(dev); @@ -9418,24 +9214,21 @@ static const struct hl_asic_funcs gaudi_funcs = { .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent, .asic_dma_free_coherent = gaudi_dma_free_coherent, .scrub_device_mem = gaudi_scrub_device_mem, + .scrub_device_dram = gaudi_scrub_device_dram, .get_int_queue_base = gaudi_get_int_queue_base, .test_queues = gaudi_test_queues, .asic_dma_pool_zalloc = gaudi_dma_pool_zalloc, .asic_dma_pool_free = gaudi_dma_pool_free, .cpu_accessible_dma_pool_alloc = gaudi_cpu_accessible_dma_pool_alloc, .cpu_accessible_dma_pool_free = gaudi_cpu_accessible_dma_pool_free, - .hl_dma_unmap_sg = gaudi_dma_unmap_sg, + .hl_dma_unmap_sgtable = hl_dma_unmap_sgtable, .cs_parser = gaudi_cs_parser, - .asic_dma_map_sg = gaudi_dma_map_sg, + .asic_dma_map_sgtable = hl_dma_map_sgtable, .get_dma_desc_list_size = gaudi_get_dma_desc_list_size, .add_end_of_cb_packets = gaudi_add_end_of_cb_packets, .update_eq_ci = gaudi_update_eq_ci, .context_switch = gaudi_context_switch, .restore_phase_topology = gaudi_restore_phase_topology, - .debugfs_read32 = gaudi_debugfs_read32, - .debugfs_write32 = gaudi_debugfs_write32, - .debugfs_read64 = gaudi_debugfs_read64, - .debugfs_write64 = gaudi_debugfs_write64, .debugfs_read_dma = gaudi_debugfs_read_dma, .add_device_attr = gaudi_add_device_attr, .handle_eqe = gaudi_handle_eqe, @@ -9444,6 +9237,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .write_pte = gaudi_write_pte, .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, + .mmu_prefetch_cache_range = NULL, .send_heartbeat = gaudi_send_heartbeat, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, @@ -9452,6 +9246,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .hw_queues_unlock = gaudi_hw_queues_unlock, .get_pci_id = gaudi_get_pci_id, .get_eeprom_data = gaudi_get_eeprom_data, + .get_monitor_dump = gaudi_get_monitor_dump, .send_cpu_message = gaudi_send_cpu_message, .pci_bars_map = gaudi_pci_bars_map, .init_iatu = gaudi_init_iatu, @@ -9469,7 +9264,6 @@ static const struct hl_asic_funcs gaudi_funcs = { .gen_wait_cb = gaudi_gen_wait_cb, .reset_sob = gaudi_reset_sob, .reset_sob_group = gaudi_reset_sob_group, - .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw, .get_device_time = gaudi_get_device_time, .collective_wait_init_cs = gaudi_collective_wait_init_cs, .collective_wait_create_jobs = gaudi_collective_wait_create_jobs, @@ -9486,7 +9280,11 @@ static const struct hl_asic_funcs gaudi_funcs = { .get_sob_addr = gaudi_get_sob_addr, .set_pci_memory_regions = gaudi_set_pci_memory_regions, .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr, - .is_valid_dram_page_size = NULL + .is_valid_dram_page_size = NULL, + .mmu_get_real_page_size = hl_mmu_get_real_page_size, + .get_valid_dram_page_orders = gaudi_get_valid_dram_page_orders, + .access_dev_mem = hl_access_dev_mem, + .set_dram_bar_base = gaudi_set_hbm_bar_base, }; /** diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 54de7c599072..4fbcf3f0afe5 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -148,14 +148,14 @@ #define MME_QMAN_LENGTH 1024 #define MME_QMAN_SIZE_IN_BYTES (MME_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) -#define HBM_DMA_QMAN_LENGTH 1024 +#define HBM_DMA_QMAN_LENGTH 4096 #define HBM_DMA_QMAN_SIZE_IN_BYTES \ (HBM_DMA_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) #define TPC_QMAN_LENGTH 1024 #define TPC_QMAN_SIZE_IN_BYTES (TPC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) -#define NIC_QMAN_LENGTH 1024 +#define NIC_QMAN_LENGTH 4096 #define NIC_QMAN_SIZE_IN_BYTES (NIC_QMAN_LENGTH * QMAN_PQ_ENTRY_SIZE) |