diff options
author | David S. Miller <davem@davemloft.net> | 2018-02-14 15:01:52 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-02-14 15:01:52 -0500 |
commit | a92ac140fc30f5e8fc407d75de44e0bce7f59ab6 (patch) | |
tree | 262dd88933b7e899a7d22cd3e94077843ed2f989 | |
parent | 9942895b5ee4b0db53f32fbcb4a51360607aac1b (diff) | |
parent | 7494f980ca0503e3eec6f4ba508186d269b37e7f (diff) | |
download | linux-a92ac140fc30f5e8fc407d75de44e0bce7f59ab6.tar.bz2 |
Merge branch 'cxgb4-speed-up-reading-on-chip-memory'
Rahul Lakkireddy says:
====================
cxgb4: speed up reading on-chip memory
This series of patches speed up reading on-chip memory (EDC and MC)
by reading 64-bits at a time.
Patch 1 reworks logic to read EDC and MC.
Patch 2 adds logic to read EDC and MC 64-bits at a time.
v2:
- Dropped AVX CPU intrinsic instructions.
- Use readq() to read 64-bits at a time.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c | 86 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 193 |
3 files changed, 211 insertions, 73 deletions
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c index 557fd8bfd54e..6322b8df0ed0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_lib.c @@ -878,6 +878,86 @@ static int cudbg_get_payload_range(struct adapter *padap, u8 mem_type, &payload->start, &payload->end); } +static int cudbg_memory_read(struct cudbg_init *pdbg_init, int win, + int mtype, u32 addr, u32 len, void *hbuf) +{ + u32 win_pf, memoffset, mem_aperture, mem_base; + struct adapter *adap = pdbg_init->adap; + u32 pos, offset, resid; + u32 *res_buf; + u64 *buf; + int ret; + + /* Argument sanity checks ... + */ + if (addr & 0x3 || (uintptr_t)hbuf & 0x3) + return -EINVAL; + + buf = (u64 *)hbuf; + + /* Try to do 64-bit reads. Residual will be handled later. */ + resid = len & 0x7; + len -= resid; + + ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base, + &mem_aperture); + if (ret) + return ret; + + addr = addr + memoffset; + win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); + + pos = addr & ~(mem_aperture - 1); + offset = addr - pos; + + /* Set up initial PCI-E Memory Window to cover the start of our + * transfer. + */ + t4_memory_update_win(adap, win, pos | win_pf); + + /* Transfer data from the adapter */ + while (len > 0) { + *buf++ = le64_to_cpu((__force __le64) + t4_read_reg64(adap, mem_base + offset)); + offset += sizeof(u64); + len -= sizeof(u64); + + /* If we've reached the end of our current window aperture, + * move the PCI-E Memory Window on to the next. + */ + if (offset == mem_aperture) { + pos += mem_aperture; + offset = 0; + t4_memory_update_win(adap, win, pos | win_pf); + } + } + + res_buf = (u32 *)buf; + /* Read residual in 32-bit multiples */ + while (resid > sizeof(u32)) { + *res_buf++ = le32_to_cpu((__force __le32) + t4_read_reg(adap, mem_base + offset)); + offset += sizeof(u32); + resid -= sizeof(u32); + + /* If we've reached the end of our current window aperture, + * move the PCI-E Memory Window on to the next. + */ + if (offset == mem_aperture) { + pos += mem_aperture; + offset = 0; + t4_memory_update_win(adap, win, pos | win_pf); + } + } + + /* Transfer residual < 32-bits */ + if (resid) + t4_memory_rw_residual(adap, resid, mem_base + offset, + (u8 *)res_buf, T4_MEMORY_READ); + + return 0; +} + #define CUDBG_YIELD_ITERATION 256 static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, @@ -937,10 +1017,8 @@ static int cudbg_read_fw_mem(struct cudbg_init *pdbg_init, goto skip_read; spin_lock(&padap->win0_lock); - rc = t4_memory_rw(padap, MEMWIN_NIC, mem_type, - bytes_read, bytes, - (__be32 *)temp_buff.data, - 1); + rc = cudbg_memory_read(pdbg_init, MEMWIN_NIC, mem_type, + bytes_read, bytes, temp_buff.data); spin_unlock(&padap->win0_lock); if (rc) { cudbg_err->sys_err = rc; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 9040e13ce4b7..d3fa53db61ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1488,6 +1488,11 @@ u32 t4_read_pcie_cfg4(struct adapter *adap, int reg); u32 t4_get_util_window(struct adapter *adap); void t4_setup_memwin(struct adapter *adap, u32 memwin_base, u32 window); +int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off, + u32 *mem_base, u32 *mem_aperture); +void t4_memory_update_win(struct adapter *adap, int win, u32 addr); +void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf, + int dir); #define T4_MEMORY_WRITE 0 #define T4_MEMORY_READ 1 int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 047609ef0515..ba647462be6c 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -484,6 +484,117 @@ static int t4_edc_err_read(struct adapter *adap, int idx) } /** + * t4_memory_rw_init - Get memory window relative offset, base, and size. + * @adap: the adapter + * @win: PCI-E Memory Window to use + * @mtype: memory type: MEM_EDC0, MEM_EDC1 or MEM_MC + * @mem_off: memory relative offset with respect to @mtype. + * @mem_base: configured memory base address. + * @mem_aperture: configured memory window aperture. + * + * Get the configured memory window's relative offset, base, and size. + */ +int t4_memory_rw_init(struct adapter *adap, int win, int mtype, u32 *mem_off, + u32 *mem_base, u32 *mem_aperture) +{ + u32 edc_size, mc_size, mem_reg; + + /* Offset into the region of memory which is being accessed + * MEM_EDC0 = 0 + * MEM_EDC1 = 1 + * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller + * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5) + * MEM_HMA = 4 + */ + edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A)); + if (mtype == MEM_HMA) { + *mem_off = 2 * (edc_size * 1024 * 1024); + } else if (mtype != MEM_MC1) { + *mem_off = (mtype * (edc_size * 1024 * 1024)); + } else { + mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, + MA_EXT_MEMORY0_BAR_A)); + *mem_off = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; + } + + /* Each PCI-E Memory Window is programmed with a window size -- or + * "aperture" -- which controls the granularity of its mapping onto + * adapter memory. We need to grab that aperture in order to know + * how to use the specified window. The window is also programmed + * with the base address of the Memory Window in BAR0's address + * space. For T4 this is an absolute PCI-E Bus Address. For T5 + * the address is relative to BAR0. + */ + mem_reg = t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, + win)); + /* a dead adapter will return 0xffffffff for PIO reads */ + if (mem_reg == 0xffffffff) + return -ENXIO; + + *mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X); + *mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X; + if (is_t4(adap->params.chip)) + *mem_base -= adap->t4_bar0; + + return 0; +} + +/** + * t4_memory_update_win - Move memory window to specified address. + * @adap: the adapter + * @win: PCI-E Memory Window to use + * @addr: location to move. + * + * Move memory window to specified address. + */ +void t4_memory_update_win(struct adapter *adap, int win, u32 addr) +{ + t4_write_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win), + addr); + /* Read it back to ensure that changes propagate before we + * attempt to use the new value. + */ + t4_read_reg(adap, + PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win)); +} + +/** + * t4_memory_rw_residual - Read/Write residual data. + * @adap: the adapter + * @off: relative offset within residual to start read/write. + * @addr: address within indicated memory type. + * @buf: host memory buffer + * @dir: direction of transfer T4_MEMORY_READ (1) or T4_MEMORY_WRITE (0) + * + * Read/Write residual data less than 32-bits. + */ +void t4_memory_rw_residual(struct adapter *adap, u32 off, u32 addr, u8 *buf, + int dir) +{ + union { + u32 word; + char byte[4]; + } last; + unsigned char *bp; + int i; + + if (dir == T4_MEMORY_READ) { + last.word = le32_to_cpu((__force __le32) + t4_read_reg(adap, addr)); + for (bp = (unsigned char *)buf, i = off; i < 4; i++) + bp[i] = last.byte[i]; + } else { + last.word = *buf; + for (i = off; i < 4; i++) + last.byte[i] = 0; + t4_write_reg(adap, addr, + (__force u32)cpu_to_le32(last.word)); + } +} + +/** * t4_memory_rw - read/write EDC 0, EDC 1 or MC via PCIE memory window * @adap: the adapter * @win: PCI-E Memory Window to use @@ -504,8 +615,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, u32 len, void *hbuf, int dir) { u32 pos, offset, resid, memoffset; - u32 edc_size, mc_size, win_pf, mem_reg, mem_aperture, mem_base; + u32 win_pf, mem_aperture, mem_base; u32 *buf; + int ret; /* Argument sanity checks ... */ @@ -521,59 +633,26 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, resid = len & 0x3; len -= resid; - /* Offset into the region of memory which is being accessed - * MEM_EDC0 = 0 - * MEM_EDC1 = 1 - * MEM_MC = 2 -- MEM_MC for chips with only 1 memory controller - * MEM_MC1 = 3 -- for chips with 2 memory controllers (e.g. T5) - * MEM_HMA = 4 - */ - edc_size = EDRAM0_SIZE_G(t4_read_reg(adap, MA_EDRAM0_BAR_A)); - if (mtype == MEM_HMA) { - memoffset = 2 * (edc_size * 1024 * 1024); - } else if (mtype != MEM_MC1) { - memoffset = (mtype * (edc_size * 1024 * 1024)); - } else { - mc_size = EXT_MEM0_SIZE_G(t4_read_reg(adap, - MA_EXT_MEMORY0_BAR_A)); - memoffset = (MEM_MC0 * edc_size + mc_size) * 1024 * 1024; - } + ret = t4_memory_rw_init(adap, win, mtype, &memoffset, &mem_base, + &mem_aperture); + if (ret) + return ret; /* Determine the PCIE_MEM_ACCESS_OFFSET */ addr = addr + memoffset; - /* Each PCI-E Memory Window is programmed with a window size -- or - * "aperture" -- which controls the granularity of its mapping onto - * adapter memory. We need to grab that aperture in order to know - * how to use the specified window. The window is also programmed - * with the base address of the Memory Window in BAR0's address - * space. For T4 this is an absolute PCI-E Bus Address. For T5 - * the address is relative to BAR0. - */ - mem_reg = t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_BASE_WIN_A, - win)); - mem_aperture = 1 << (WINDOW_G(mem_reg) + WINDOW_SHIFT_X); - mem_base = PCIEOFST_G(mem_reg) << PCIEOFST_SHIFT_X; - if (is_t4(adap->params.chip)) - mem_base -= adap->t4_bar0; win_pf = is_t4(adap->params.chip) ? 0 : PFNUM_V(adap->pf); /* Calculate our initial PCI-E Memory Window Position and Offset into * that Window. */ - pos = addr & ~(mem_aperture-1); + pos = addr & ~(mem_aperture - 1); offset = addr - pos; /* Set up initial PCI-E Memory Window to cover the start of our - * transfer. (Read it back to ensure that changes propagate before we - * attempt to use the new value.) + * transfer. */ - t4_write_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win), - pos | win_pf); - t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, win)); + t4_memory_update_win(adap, win, pos | win_pf); /* Transfer data to/from the adapter as long as there's an integral * number of 32-bit transfers to complete. @@ -628,12 +707,7 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, if (offset == mem_aperture) { pos += mem_aperture; offset = 0; - t4_write_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, - win), pos | win_pf); - t4_read_reg(adap, - PCIE_MEM_ACCESS_REG(PCIE_MEM_ACCESS_OFFSET_A, - win)); + t4_memory_update_win(adap, win, pos | win_pf); } } @@ -642,28 +716,9 @@ int t4_memory_rw(struct adapter *adap, int win, int mtype, u32 addr, * residual amount. The PCI-E Memory Window has already been moved * above (if necessary) to cover this final transfer. */ - if (resid) { - union { - u32 word; - char byte[4]; - } last; - unsigned char *bp; - int i; - - if (dir == T4_MEMORY_READ) { - last.word = le32_to_cpu( - (__force __le32)t4_read_reg(adap, - mem_base + offset)); - for (bp = (unsigned char *)buf, i = resid; i < 4; i++) - bp[i] = last.byte[i]; - } else { - last.word = *buf; - for (i = resid; i < 4; i++) - last.byte[i] = 0; - t4_write_reg(adap, mem_base + offset, - (__force u32)cpu_to_le32(last.word)); - } - } + if (resid) + t4_memory_rw_residual(adap, resid, mem_base + offset, + (u8 *)buf, dir); return 0; } |