From 64a7e2955d9a8a73098f13ccac95d80ad6efd98f Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Sun, 5 Jan 2020 09:05:45 +0000 Subject: habanalabs: split the host MMU properties Host memory may be allocated with huge pages. A different virtual range may be used for mapping in this case. Add Huge PCI MMU (HPMMU) properties to support it. This patch is a prerequisite for future ASICs support and has no effect on Goya ASIC as currently a single virtual host range is used for all page sizes. Signed-off-by: Omer Shpigelman Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/debugfs.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/misc/habanalabs/debugfs.c') diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 20413e350343..599d17dfd542 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data) } is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + prop->dmmu.start_addr, + prop->dmmu.end_addr); + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mutex_lock(&ctx->mmu_lock); @@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) goto out; if (hdev->dram_supports_virtual_memory && - addr >= prop->va_space_dram_start_address && - addr < prop->va_space_dram_end_address) + (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) return true; - if (addr >= prop->va_space_host_start_address && - addr < prop->va_space_host_end_address) + if (addr >= prop->pmmu.start_addr && + addr < prop->pmmu.end_addr) + return true; + + if (addr >= prop->pmmu_huge.start_addr && + addr < prop->pmmu_huge.end_addr) return true; out: return false; @@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, } is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->va_space_dram_start_address, - prop->va_space_dram_end_address); + prop->dmmu.start_addr, + prop->dmmu.end_addr); + /* shifts and masks are the same in PMMU and HPMMU, use one of them */ mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; mutex_lock(&ctx->mmu_lock); -- cgit v1.2.3 From 5cce51464c61b868157e578261d45fe389e81e54 Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Tue, 12 Nov 2019 09:40:11 +0200 Subject: habanalabs: add debugfs write64/read64 Allow debug user to write/read 64-bit data through debugfs. This will expedite the dump process of the (large) internal memories of the device done during debug. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../ABI/testing/debugfs-driver-habanalabs | 14 ++++ drivers/misc/habanalabs/debugfs.c | 71 +++++++++++++++++ drivers/misc/habanalabs/goya/goya.c | 92 ++++++++++++++++++++++ drivers/misc/habanalabs/habanalabs.h | 2 + 4 files changed, 179 insertions(+) (limited to 'drivers/misc/habanalabs/debugfs.c') diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs index f0ac14b70ecb..a73601c5121e 100644 --- a/Documentation/ABI/testing/debugfs-driver-habanalabs +++ b/Documentation/ABI/testing/debugfs-driver-habanalabs @@ -43,6 +43,20 @@ Description: Allows the root user to read or write directly through the If the IOMMU is disabled, it also allows the root user to read or write from the host a device VA of a host mapped memory +What: /sys/kernel/debug/habanalabs/hl/data64 +Date: Jan 2020 +KernelVersion: 5.6 +Contact: oded.gabbay@gmail.com +Description: Allows the root user to read or write 64 bit data directly + through the device's PCI bar. Writing to this file generates a + write transaction while reading from the file generates a read + transaction. This custom interface is needed (instead of using + the generic Linux user-space PCI mapping) because the DDR bar + is very small compared to the DDR memory and only the driver can + move the bar before and after the transaction. + If the IOMMU is disabled, it also allows the root user to read + or write from the host a device VA of a host mapped memory + What: /sys/kernel/debug/habanalabs/hl/device Date: Jan 2019 KernelVersion: 5.1 diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index 599d17dfd542..756d36ed5d95 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -710,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf, return count; } +static ssize_t hl_data_read64(struct file *f, char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + char tmp_buf[32]; + u64 addr = entry->addr; + u64 val; + ssize_t rc; + + if (*ppos) + return 0; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val); + if (rc) { + dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr); + return rc; + } + + sprintf(tmp_buf, "0x%016llx\n", val); + return simple_read_from_buffer(buf, count, ppos, tmp_buf, + strlen(tmp_buf)); +} + +static ssize_t hl_data_write64(struct file *f, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct hl_dbg_device_entry *entry = file_inode(f)->i_private; + struct hl_device *hdev = entry->hdev; + u64 addr = entry->addr; + u64 value; + ssize_t rc; + + rc = kstrtoull_from_user(buf, count, 16, &value); + if (rc) + return rc; + + if (hl_is_device_va(hdev, addr)) { + rc = device_va_to_pa(hdev, addr, &addr); + if (rc) + return rc; + } + + rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value); + if (rc) { + dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n", + value, addr); + return rc; + } + + return count; +} + static ssize_t hl_get_power_state(struct file *f, char __user *buf, size_t count, loff_t *ppos) { @@ -917,6 +976,12 @@ static const struct file_operations hl_data32b_fops = { .write = hl_data_write32 }; +static const struct file_operations hl_data64b_fops = { + .owner = THIS_MODULE, + .read = hl_data_read64, + .write = hl_data_write64 +}; + static const struct file_operations hl_i2c_data_fops = { .owner = THIS_MODULE, .read = hl_i2c_data_read, @@ -1030,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev) dev_entry, &hl_data32b_fops); + debugfs_create_file("data64", + 0644, + dev_entry->root, + dev_entry, + &hl_data64b_fops); + debugfs_create_file("set_power_state", 0200, dev_entry->root, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index f634e9c5cad9..0b6567b48622 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -4180,6 +4180,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val) return rc; } +static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 ddr_bar_addr; + int rc = 0; + + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { + u32 val_l = RREG32(addr - CFG_BASE); + u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE); + + *val = (((u64) val_h) << 32) | val_l; + + } else if ((addr >= SRAM_BASE_ADDR) && + (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { + + *val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] + + (addr - SRAM_BASE_ADDR)); + + } else if ((addr >= DRAM_PHYS_BASE) && + (addr <= + DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) { + + u64 bar_base_addr = DRAM_PHYS_BASE + + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); + if (ddr_bar_addr != U64_MAX) { + *val = readq(hdev->pcie_bar[DDR_BAR_ID] + + (addr - bar_base_addr)); + + ddr_bar_addr = goya_set_ddr_bar_base(hdev, + ddr_bar_addr); + } + if (ddr_bar_addr == U64_MAX) + rc = -EIO; + + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE); + + } else { + rc = -EFAULT; + } + + return rc; +} + +static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 ddr_bar_addr; + int rc = 0; + + if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) { + WREG32(addr - CFG_BASE, lower_32_bits(val)); + WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val)); + + } else if ((addr >= SRAM_BASE_ADDR) && + (addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) { + + writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] + + (addr - SRAM_BASE_ADDR)); + + } else if ((addr >= DRAM_PHYS_BASE) && + (addr <= + DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) { + + u64 bar_base_addr = DRAM_PHYS_BASE + + (addr & ~(prop->dram_pci_bar_size - 0x1ull)); + + ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr); + if (ddr_bar_addr != U64_MAX) { + writeq(val, hdev->pcie_bar[DDR_BAR_ID] + + (addr - bar_base_addr)); + + ddr_bar_addr = goya_set_ddr_bar_base(hdev, + ddr_bar_addr); + } + if (ddr_bar_addr == U64_MAX) + rc = -EIO; + + } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) { + *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val; + + } else { + rc = -EFAULT; + } + + return rc; +} + static u64 goya_read_pte(struct hl_device *hdev, u64 addr) { struct goya_device *goya = hdev->asic_specific; @@ -5186,6 +5276,8 @@ static const struct hl_asic_funcs goya_funcs = { .restore_phase_topology = goya_restore_phase_topology, .debugfs_read32 = goya_debugfs_read32, .debugfs_write32 = goya_debugfs_write32, + .debugfs_read64 = goya_debugfs_read64, + .debugfs_write64 = goya_debugfs_write64, .add_device_attr = goya_add_device_attr, .handle_eqe = goya_handle_eqe, .set_pll_profile = goya_set_pll_profile, diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 954906292c00..4ef8cf23d099 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -582,6 +582,8 @@ struct hl_asic_funcs { void (*restore_phase_topology)(struct hl_device *hdev); int (*debugfs_read32)(struct hl_device *hdev, u64 addr, u32 *val); int (*debugfs_write32)(struct hl_device *hdev, u64 addr, u32 val); + int (*debugfs_read64)(struct hl_device *hdev, u64 addr, u64 *val); + int (*debugfs_write64)(struct hl_device *hdev, u64 addr, u64 val); void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_attr_grp); void (*handle_eqe)(struct hl_device *hdev, -- cgit v1.2.3