From 5836b4429777bf57ca8fc02b154263aa54d97508 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:50 +0530 Subject: net: thunderx: Support for page recycling Adds support for page recycling for allocating receive buffers to reduce cost of refilling RBDR ring. Also got rid of using compound pages when pagesize is 4K, only order-0 pages now. Only page is recycled, DMA mappings still needs to be done for every receive buffer allocated due to following constraints - Cannot have just one receive buffer per 64KB page. - There is just one buffer ring shared across 8 Rx queues, so buffers of same page can go to any Rx queue. - HW gives buffer address where packet has been DMA'ed and not the index into buffer ring. This makes it not possible to resue DMA mapping info. So unfortunately have to go through costly mapping route for every buffer. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 4 +- .../net/ethernet/cavium/thunder/nicvf_ethtool.c | 3 +- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 121 ++++++++++++++++++--- drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 11 ++ 4 files changed, 119 insertions(+), 20 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index 6fb44218bf55..dca6aed49094 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -252,12 +252,14 @@ struct nicvf_drv_stats { u64 tx_csum_overflow; /* driver debug stats */ - u64 rcv_buffer_alloc_failures; u64 tx_tso; u64 tx_timeout; u64 txq_stop; u64 txq_wake; + u64 rcv_buffer_alloc_failures; + u64 page_alloc; + struct u64_stats_sync syncp; }; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index 02a986cdbb39..a89db5f3e26e 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -100,11 +100,12 @@ static const struct nicvf_stat nicvf_drv_stats[] = { NICVF_DRV_STAT(tx_csum_overlap), NICVF_DRV_STAT(tx_csum_overflow), - NICVF_DRV_STAT(rcv_buffer_alloc_failures), NICVF_DRV_STAT(tx_tso), NICVF_DRV_STAT(tx_timeout), NICVF_DRV_STAT(txq_stop), NICVF_DRV_STAT(txq_wake), + NICVF_DRV_STAT(rcv_buffer_alloc_failures), + NICVF_DRV_STAT(page_alloc), }; static const struct nicvf_stat nicvf_queue_stats[] = { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 7b0fd8d871cc..12f9709bb180 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -19,8 +19,6 @@ #include "q_struct.h" #include "nicvf_queues.h" -#define NICVF_PAGE_ORDER ((PAGE_SIZE <= 4096) ? PAGE_ALLOC_COSTLY_ORDER : 0) - static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) { /* Translation is installed only when IOMMU is present */ @@ -90,33 +88,88 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) dmem->base = NULL; } -/* Allocate buffer for packet reception - * HW returns memory address where packet is DMA'ed but not a pointer - * into RBDR ring, so save buffer address at the start of fragment and - * align the start address to a cache aligned address +/* Allocate a new page or recycle one if possible + * + * We cannot optimize dma mapping here, since + * 1. It's only one RBDR ring for 8 Rx queues. + * 2. CQE_RX gives address of the buffer where pkt has been DMA'ed + * and not idx into RBDR ring, so can't refer to saved info. + * 3. There are multiple receive buffers per page */ -static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, - u32 buf_len, u64 **rbuf) +static struct pgcache *nicvf_alloc_page(struct nicvf *nic, + struct rbdr *rbdr, gfp_t gfp) { - int order = NICVF_PAGE_ORDER; + struct page *page = NULL; + struct pgcache *pgcache, *next; + + /* Check if page is already allocated */ + pgcache = &rbdr->pgcache[rbdr->pgidx]; + page = pgcache->page; + /* Check if page can be recycled */ + if (page && (page_ref_count(page) != 1)) + page = NULL; + + if (!page) { + page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0); + if (!page) + return NULL; + + this_cpu_inc(nic->pnicvf->drv_stats->page_alloc); + + /* Check for space */ + if (rbdr->pgalloc >= rbdr->pgcnt) { + /* Page can still be used */ + nic->rb_page = page; + return NULL; + } + + /* Save the page in page cache */ + pgcache->page = page; + rbdr->pgalloc++; + } + + /* Take extra page reference for recycling */ + page_ref_add(page, 1); + + rbdr->pgidx++; + rbdr->pgidx &= (rbdr->pgcnt - 1); + + /* Prefetch refcount of next page in page cache */ + next = &rbdr->pgcache[rbdr->pgidx]; + page = next->page; + if (page) + prefetch(&page->_refcount); + + return pgcache; +} + +/* Allocate buffer for packet reception */ +static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, + gfp_t gfp, u32 buf_len, u64 **rbuf) +{ + struct pgcache *pgcache = NULL; /* Check if request can be accomodated in previous allocated page */ if (nic->rb_page && - ((nic->rb_page_offset + buf_len) < (PAGE_SIZE << order))) { + ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) { nic->rb_pageref++; goto ret; } nicvf_get_page(nic); + nic->rb_page = NULL; - /* Allocate a new page */ - nic->rb_page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, - order); - if (!nic->rb_page) { + /* Get new page, either recycled or new one */ + pgcache = nicvf_alloc_page(nic, rbdr, gfp); + if (!pgcache && !nic->rb_page) { this_cpu_inc(nic->pnicvf->drv_stats->rcv_buffer_alloc_failures); return -ENOMEM; } + nic->rb_page_offset = 0; + /* Check if it's recycled */ + if (pgcache) + nic->rb_page = pgcache->page; ret: /* HW will ensure data coherency, CPU sync not required */ *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, @@ -125,7 +178,7 @@ ret: DMA_ATTR_SKIP_CPU_SYNC)); if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { if (!nic->rb_page_offset) - __free_pages(nic->rb_page, order); + __free_pages(nic->rb_page, 0); nic->rb_page = NULL; return -ENOMEM; } @@ -177,10 +230,26 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, rbdr->head = 0; rbdr->tail = 0; + /* Initialize page recycling stuff. + * + * Can't use single buffer per page especially with 64K pages. + * On embedded platforms i.e 81xx/83xx available memory itself + * is low and minimum ring size of RBDR is 8K, that takes away + * lots of memory. + */ + rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size); + rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt); + rbdr->pgcache = kzalloc(sizeof(*rbdr->pgcache) * + rbdr->pgcnt, GFP_KERNEL); + if (!rbdr->pgcache) + return -ENOMEM; + rbdr->pgidx = 0; + rbdr->pgalloc = 0; + nic->rb_page = NULL; for (idx = 0; idx < ring_len; idx++) { - err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN, - &rbuf); + err = nicvf_alloc_rcv_buffer(nic, rbdr, GFP_KERNEL, + RCV_FRAG_LEN, &rbuf); if (err) { /* To free already allocated and mapped ones */ rbdr->tail = idx - 1; @@ -201,6 +270,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) { int head, tail; u64 buf_addr, phys_addr; + struct pgcache *pgcache; struct rbdr_entry_t *desc; if (!rbdr) @@ -234,6 +304,18 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) if (phys_addr) put_page(virt_to_page(phys_to_virt(phys_addr))); + /* Sync page cache info */ + smp_rmb(); + + /* Release additional page references held for recycling */ + head = 0; + while (head < rbdr->pgcnt) { + pgcache = &rbdr->pgcache[head]; + if (pgcache->page && page_ref_count(pgcache->page) != 0) + put_page(pgcache->page); + head++; + } + /* Free RBDR ring */ nicvf_free_q_desc_mem(nic, &rbdr->dmem); } @@ -269,13 +351,16 @@ refill: else refill_rb_cnt = qs->rbdr_len - qcount - 1; + /* Sync page cache info */ + smp_rmb(); + /* Start filling descs from tail */ tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3; while (refill_rb_cnt) { tail++; tail &= (rbdr->dmem.q_len - 1); - if (nicvf_alloc_rcv_buffer(nic, gfp, RCV_FRAG_LEN, &rbuf)) + if (nicvf_alloc_rcv_buffer(nic, rbdr, gfp, RCV_FRAG_LEN, &rbuf)) break; desc = GET_RBDR_DESC(rbdr, tail); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 10cb4b84625b..da4836601d8c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -213,6 +213,11 @@ struct q_desc_mem { void *unalign_base; }; +struct pgcache { + struct page *page; + u64 dma_addr; +}; + struct rbdr { bool enable; u32 dma_size; @@ -222,6 +227,12 @@ struct rbdr { u32 head; u32 tail; struct q_desc_mem dmem; + + /* For page recycling */ + int pgidx; + int pgcnt; + int pgalloc; + struct pgcache *pgcache; } ____cacheline_aligned_in_smp; struct rcv_queue { -- cgit v1.2.3 From 5e848e4c5d77438e126c97702ec3bea477f550a9 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:51 +0530 Subject: net: thunderx: Optimize RBDR descriptor handling Receive buffer's physical address or iova will anyway not go beyond 49bits, since it is the max supported HW address. As per perf, updating bitfields i.e buf_addr:42 in RBDR descriptor entry consumes lots of cpu cycles, hence changed it to a 64bit field with alignment requirements taken care of. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 8 ++++---- drivers/net/ethernet/cavium/thunder/q_struct.h | 10 +--------- 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 12f9709bb180..dfc85a169127 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -257,7 +257,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, } desc = GET_RBDR_DESC(rbdr, idx); - desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; + desc->buf_addr = (u64)rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); } nicvf_get_page(nic); @@ -286,7 +286,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) /* Release page references */ while (head != tail) { desc = GET_RBDR_DESC(rbdr, head); - buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; + buf_addr = desc->buf_addr; phys_addr = nicvf_iova_to_phys(nic, buf_addr); dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); @@ -297,7 +297,7 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) } /* Release buffer of tail desc */ desc = GET_RBDR_DESC(rbdr, tail); - buf_addr = ((u64)desc->buf_addr) << NICVF_RCV_BUF_ALIGN; + buf_addr = desc->buf_addr; phys_addr = nicvf_iova_to_phys(nic, buf_addr); dma_unmap_page_attrs(&nic->pdev->dev, buf_addr, RCV_FRAG_LEN, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); @@ -364,7 +364,7 @@ refill: break; desc = GET_RBDR_DESC(rbdr, tail); - desc->buf_addr = (u64)rbuf >> NICVF_RCV_BUF_ALIGN; + desc->buf_addr = (u64)rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); refill_rb_cnt--; new_rb++; } diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h index f36347237a54..e47205aa87ea 100644 --- a/drivers/net/ethernet/cavium/thunder/q_struct.h +++ b/drivers/net/ethernet/cavium/thunder/q_struct.h @@ -359,15 +359,7 @@ union cq_desc_t { }; struct rbdr_entry_t { -#if defined(__BIG_ENDIAN_BITFIELD) - u64 rsvd0:15; - u64 buf_addr:42; - u64 cache_align:7; -#elif defined(__LITTLE_ENDIAN_BITFIELD) - u64 cache_align:7; - u64 buf_addr:42; - u64 rsvd0:15; -#endif + u64 buf_addr; }; /* TCP reassembly context */ -- cgit v1.2.3 From 0dada88b8cd74569abc3dda50f1b268a5868f6f2 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:52 +0530 Subject: net: thunderx: Optimize CQE_TX handling Optimized CQE handling with below changes - Feeing descriptors back to SQ in bulk i.e once per NAPI instance instead for every CQE_TX, this will reduce number of atomic updates to 'sq->free_cnt'. - Checking errors in CQE_TX and CQE_RX before calling appropriate fn()s to update error stats i.e reduce branching. Also removed debug messages in packet handling path which otherwise causes issues if DEBUG is enabled. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 44 +++++++++++----------- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 5 --- 2 files changed, 21 insertions(+), 28 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 81a2fcb3cb1b..0d79894400ab 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -498,7 +498,7 @@ static int nicvf_init_resources(struct nicvf *nic) static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cqe_send_t *cqe_tx, - int cqe_type, int budget, + int budget, int *subdesc_cnt, unsigned int *tx_pkts, unsigned int *tx_bytes) { struct sk_buff *skb = NULL; @@ -513,12 +513,10 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) return; - netdev_dbg(nic->netdev, - "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n", - __func__, cqe_tx->sq_qs, cqe_tx->sq_idx, - cqe_tx->sqe_ptr, hdr->subdesc_cnt); + /* Check for errors */ + if (cqe_tx->send_status) + nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); - nicvf_check_cqe_tx_errs(nic, cqe_tx); skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; if (skb) { /* Check for dummy descriptor used for HW TSO offload on 88xx */ @@ -528,12 +526,12 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, hdr->rsvd2); nicvf_unmap_sndq_buffers(nic, sq, hdr->rsvd2, tso_sqe->subdesc_cnt); - nicvf_put_sq_desc(sq, tso_sqe->subdesc_cnt + 1); + *subdesc_cnt += tso_sqe->subdesc_cnt + 1; } else { nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, hdr->subdesc_cnt); } - nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + *subdesc_cnt += hdr->subdesc_cnt + 1; prefetch(skb); (*tx_pkts)++; *tx_bytes += skb->len; @@ -544,7 +542,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, * a SKB attached, so just free SQEs here. */ if (!nic->hw_tso) - nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + *subdesc_cnt += hdr->subdesc_cnt + 1; } } @@ -595,9 +593,11 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, } /* Check for errors */ - err = nicvf_check_cqe_rx_errs(nic, cqe_rx); - if (err && !cqe_rx->rb_cnt) - return; + if (cqe_rx->err_level || cqe_rx->err_opcode) { + err = nicvf_check_cqe_rx_errs(nic, cqe_rx); + if (err && !cqe_rx->rb_cnt) + return; + } skb = nicvf_get_rcv_skb(snic, cqe_rx); if (!skb) { @@ -646,6 +646,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, { int processed_cqe, work_done = 0, tx_done = 0; int cqe_count, cqe_head; + int subdesc_cnt = 0; struct nicvf *nic = netdev_priv(netdev); struct queue_set *qs = nic->qs; struct cmp_queue *cq = &qs->cq[cq_idx]; @@ -667,8 +668,6 @@ loop: cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; cqe_head &= 0xFFFF; - netdev_dbg(nic->netdev, "%s CQ%d cqe_count %d cqe_head %d\n", - __func__, cq_idx, cqe_count, cqe_head); while (processed_cqe < cqe_count) { /* Get the CQ descriptor */ cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); @@ -682,17 +681,15 @@ loop: break; } - netdev_dbg(nic->netdev, "CQ%d cq_desc->cqe_type %d\n", - cq_idx, cq_desc->cqe_type); switch (cq_desc->cqe_type) { case CQE_TYPE_RX: nicvf_rcv_pkt_handler(netdev, napi, cq_desc); work_done++; break; case CQE_TYPE_SEND: - nicvf_snd_pkt_handler(netdev, - (void *)cq_desc, CQE_TYPE_SEND, - budget, &tx_pkts, &tx_bytes); + nicvf_snd_pkt_handler(netdev, (void *)cq_desc, + budget, &subdesc_cnt, + &tx_pkts, &tx_bytes); tx_done++; break; case CQE_TYPE_INVALID: @@ -704,9 +701,6 @@ loop: } processed_cqe++; } - netdev_dbg(nic->netdev, - "%s CQ%d processed_cqe %d work_done %d budget %d\n", - __func__, cq_idx, processed_cqe, work_done, budget); /* Ring doorbell to inform H/W to reuse processed CQEs */ nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, @@ -716,8 +710,12 @@ loop: goto loop; done: - /* Wakeup TXQ if its stopped earlier due to SQ full */ sq = &nic->qs->sq[cq_idx]; + /* Update SQ's descriptor free count */ + if (subdesc_cnt) + nicvf_put_sq_desc(sq, subdesc_cnt); + + /* Wakeup TXQ if its stopped earlier due to SQ full */ if (tx_done || (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { netdev = nic->pnicvf->netdev; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index dfc85a169127..90c5bc7d7344 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -1640,9 +1640,6 @@ void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) /* Check for errors in the receive cmp.queue entry */ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) { - if (!cqe_rx->err_level && !cqe_rx->err_opcode) - return 0; - if (netif_msg_rx_err(nic)) netdev_err(nic->netdev, "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n", @@ -1731,8 +1728,6 @@ int nicvf_check_cqe_rx_errs(struct nicvf *nic, struct cqe_rx_t *cqe_rx) int nicvf_check_cqe_tx_errs(struct nicvf *nic, struct cqe_send_t *cqe_tx) { switch (cqe_tx->send_status) { - case CQ_TX_ERROP_GOOD: - return 0; case CQ_TX_ERROP_DESC_FAULT: this_cpu_inc(nic->drv_stats->tx_desc_fault); break; -- cgit v1.2.3 From 927987f39f116db477fcd74ced2a2aea940e585c Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:53 +0530 Subject: net: thunderx: Cleanup receive buffer allocation Get rid of unnecessary double pointer references and type casting in receive buffer allocation code. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 90c5bc7d7344..e4a02a96d4f0 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -145,7 +145,7 @@ static struct pgcache *nicvf_alloc_page(struct nicvf *nic, /* Allocate buffer for packet reception */ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, - gfp_t gfp, u32 buf_len, u64 **rbuf) + gfp_t gfp, u32 buf_len, u64 *rbuf) { struct pgcache *pgcache = NULL; @@ -172,10 +172,10 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, nic->rb_page = pgcache->page; ret: /* HW will ensure data coherency, CPU sync not required */ - *rbuf = (u64 *)((u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, - nic->rb_page_offset, buf_len, - DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC)); + *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, + nic->rb_page_offset, buf_len, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { if (!nic->rb_page_offset) __free_pages(nic->rb_page, 0); @@ -212,7 +212,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, int ring_len, int buf_size) { int idx; - u64 *rbuf; + u64 rbuf; struct rbdr_entry_t *desc; int err; @@ -257,7 +257,7 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, } desc = GET_RBDR_DESC(rbdr, idx); - desc->buf_addr = (u64)rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); + desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); } nicvf_get_page(nic); @@ -330,7 +330,7 @@ static void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp) int refill_rb_cnt; struct rbdr *rbdr; struct rbdr_entry_t *desc; - u64 *rbuf; + u64 rbuf; int new_rb = 0; refill: @@ -364,7 +364,7 @@ refill: break; desc = GET_RBDR_DESC(rbdr, tail); - desc->buf_addr = (u64)rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); + desc->buf_addr = rbuf & ~(NICVF_RCV_BUF_ALIGN_BYTES - 1); refill_rb_cnt--; new_rb++; } -- cgit v1.2.3 From 05c773f52b96ef3fbc7d9bfa21caadc6247ef7a8 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:54 +0530 Subject: net: thunderx: Add basic XDP support Adds basic XDP support i.e attaching a BPF program to an interface. Also takes care of allocating separate Tx queues for XDP path and for network stack packet transmission. This patch doesn't support handling of any of the XDP actions, all are treated as XDP_PASS i.e packets will be handed over to the network stack. Changes also involve allocating one receive buffer per page in XDP mode and multiple in normal mode i.e when no BPF program is attached. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic.h | 6 +- .../net/ethernet/cavium/thunder/nicvf_ethtool.c | 26 +++- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 162 ++++++++++++++++++++- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 15 +- drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 9 ++ 5 files changed, 199 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index dca6aed49094..4a02e618e318 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -268,9 +268,9 @@ struct nicvf { struct net_device *netdev; struct pci_dev *pdev; void __iomem *reg_base; + struct bpf_prog *xdp_prog; #define MAX_QUEUES_PER_QSET 8 struct queue_set *qs; - struct nicvf_cq_poll *napi[8]; void *iommu_domain; u8 vf_id; u8 sqs_id; @@ -296,6 +296,7 @@ struct nicvf { /* Queue count */ u8 rx_queues; u8 tx_queues; + u8 xdp_tx_queues; u8 max_queues; u8 node; @@ -320,6 +321,9 @@ struct nicvf { struct nicvf_drv_stats __percpu *drv_stats; struct bgx_stats bgx_stats; + /* Napi */ + struct nicvf_cq_poll *napi[8]; + /* MSI-X */ u8 num_vec; char irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15]; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c index a89db5f3e26e..b9ece9cbf98b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -721,7 +721,7 @@ static int nicvf_set_channels(struct net_device *dev, struct nicvf *nic = netdev_priv(dev); int err = 0; bool if_up = netif_running(dev); - int cqcount; + u8 cqcount, txq_count; if (!channel->rx_count || !channel->tx_count) return -EINVAL; @@ -730,10 +730,26 @@ static int nicvf_set_channels(struct net_device *dev, if (channel->tx_count > nic->max_queues) return -EINVAL; + if (nic->xdp_prog && + ((channel->tx_count + channel->rx_count) > nic->max_queues)) { + netdev_err(nic->netdev, + "XDP mode, RXQs + TXQs > Max %d\n", + nic->max_queues); + return -EINVAL; + } + if (if_up) nicvf_stop(dev); - cqcount = max(channel->rx_count, channel->tx_count); + nic->rx_queues = channel->rx_count; + nic->tx_queues = channel->tx_count; + if (!nic->xdp_prog) + nic->xdp_tx_queues = 0; + else + nic->xdp_tx_queues = channel->rx_count; + + txq_count = nic->xdp_tx_queues + nic->tx_queues; + cqcount = max(nic->rx_queues, txq_count); if (cqcount > MAX_CMP_QUEUES_PER_QS) { nic->sqs_count = roundup(cqcount, MAX_CMP_QUEUES_PER_QS); @@ -742,12 +758,10 @@ static int nicvf_set_channels(struct net_device *dev, nic->sqs_count = 0; } - nic->qs->rq_cnt = min_t(u32, channel->rx_count, MAX_RCV_QUEUES_PER_QS); - nic->qs->sq_cnt = min_t(u32, channel->tx_count, MAX_SND_QUEUES_PER_QS); + nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); + nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt); - nic->rx_queues = channel->rx_count; - nic->tx_queues = channel->tx_count; err = nicvf_set_real_num_queues(dev, nic->tx_queues, nic->rx_queues); if (err) return err; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 0d79894400ab..9c48873350f8 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include "nic_reg.h" #include "nic.h" @@ -397,8 +399,10 @@ static void nicvf_request_sqs(struct nicvf *nic) if (nic->rx_queues > MAX_RCV_QUEUES_PER_QS) rx_queues = nic->rx_queues - MAX_RCV_QUEUES_PER_QS; - if (nic->tx_queues > MAX_SND_QUEUES_PER_QS) - tx_queues = nic->tx_queues - MAX_SND_QUEUES_PER_QS; + + tx_queues = nic->tx_queues + nic->xdp_tx_queues; + if (tx_queues > MAX_SND_QUEUES_PER_QS) + tx_queues = tx_queues - MAX_SND_QUEUES_PER_QS; /* Set no of Rx/Tx queues in each of the SQsets */ for (sqs = 0; sqs < nic->sqs_count; sqs++) { @@ -496,6 +500,43 @@ static int nicvf_init_resources(struct nicvf *nic) return 0; } +static inline bool nicvf_xdp_rx(struct nicvf *nic, + struct bpf_prog *prog, + struct cqe_rx_t *cqe_rx) +{ + struct xdp_buff xdp; + u32 action; + u16 len; + u64 dma_addr, cpu_addr; + + /* Retrieve packet buffer's DMA address and length */ + len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); + dma_addr = *((u64 *)((void *)cqe_rx + (7 * sizeof(u64)))); + + cpu_addr = nicvf_iova_to_phys(nic, dma_addr); + if (!cpu_addr) + return false; + + xdp.data = phys_to_virt(cpu_addr); + xdp.data_end = xdp.data + len; + + rcu_read_lock(); + action = bpf_prog_run_xdp(prog, &xdp); + rcu_read_unlock(); + + switch (action) { + case XDP_PASS: + case XDP_TX: + case XDP_ABORTED: + case XDP_DROP: + /* Pass on all packets to network stack */ + return false; + default: + bpf_warn_invalid_xdp_action(action); + } + return false; +} + static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cqe_send_t *cqe_tx, int budget, int *subdesc_cnt, @@ -599,6 +640,11 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, return; } + /* For XDP, ignore pkts spanning multiple pages */ + if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) + if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx)) + return; + skb = nicvf_get_rcv_skb(snic, cqe_rx); if (!skb) { netdev_dbg(nic->netdev, "Packet not received\n"); @@ -1529,6 +1575,117 @@ static int nicvf_set_features(struct net_device *netdev, return 0; } +static void nicvf_set_xdp_queues(struct nicvf *nic, bool bpf_attached) +{ + u8 cq_count, txq_count; + + /* Set XDP Tx queue count same as Rx queue count */ + if (!bpf_attached) + nic->xdp_tx_queues = 0; + else + nic->xdp_tx_queues = nic->rx_queues; + + /* If queue count > MAX_CMP_QUEUES_PER_QS, then additional qsets + * needs to be allocated, check how many. + */ + txq_count = nic->xdp_tx_queues + nic->tx_queues; + cq_count = max(nic->rx_queues, txq_count); + if (cq_count > MAX_CMP_QUEUES_PER_QS) { + nic->sqs_count = roundup(cq_count, MAX_CMP_QUEUES_PER_QS); + nic->sqs_count = (nic->sqs_count / MAX_CMP_QUEUES_PER_QS) - 1; + } else { + nic->sqs_count = 0; + } + + /* Set primary Qset's resources */ + nic->qs->rq_cnt = min_t(u8, nic->rx_queues, MAX_RCV_QUEUES_PER_QS); + nic->qs->sq_cnt = min_t(u8, txq_count, MAX_SND_QUEUES_PER_QS); + nic->qs->cq_cnt = max_t(u8, nic->qs->rq_cnt, nic->qs->sq_cnt); + + /* Update stack */ + nicvf_set_real_num_queues(nic->netdev, nic->tx_queues, nic->rx_queues); +} + +static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) +{ + struct net_device *dev = nic->netdev; + bool if_up = netif_running(nic->netdev); + struct bpf_prog *old_prog; + bool bpf_attached = false; + + /* For now just support only the usual MTU sized frames */ + if (prog && (dev->mtu > 1500)) { + netdev_warn(dev, "Jumbo frames not yet supported with XDP, current MTU %d.\n", + dev->mtu); + return -EOPNOTSUPP; + } + + if (prog && prog->xdp_adjust_head) + return -EOPNOTSUPP; + + /* ALL SQs attached to CQs i.e same as RQs, are treated as + * XDP Tx queues and more Tx queues are allocated for + * network stack to send pkts out. + * + * No of Tx queues are either same as Rx queues or whatever + * is left in max no of queues possible. + */ + if ((nic->rx_queues + nic->tx_queues) > nic->max_queues) { + netdev_warn(dev, + "Failed to attach BPF prog, RXQs + TXQs > Max %d\n", + nic->max_queues); + return -ENOMEM; + } + + if (if_up) + nicvf_stop(nic->netdev); + + old_prog = xchg(&nic->xdp_prog, prog); + /* Detach old prog, if any */ + if (old_prog) + bpf_prog_put(old_prog); + + if (nic->xdp_prog) { + /* Attach BPF program */ + nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); + if (!IS_ERR(nic->xdp_prog)) + bpf_attached = true; + } + + /* Calculate Tx queues needed for XDP and network stack */ + nicvf_set_xdp_queues(nic, bpf_attached); + + if (if_up) { + /* Reinitialize interface, clean slate */ + nicvf_open(nic->netdev); + netif_trans_update(nic->netdev); + } + + return 0; +} + +static int nicvf_xdp(struct net_device *netdev, struct netdev_xdp *xdp) +{ + struct nicvf *nic = netdev_priv(netdev); + + /* To avoid checks while retrieving buffer address from CQE_RX, + * do not support XDP for T88 pass1.x silicons which are anyway + * not in use widely. + */ + if (pass1_silicon(nic->pdev)) + return -EOPNOTSUPP; + + switch (xdp->command) { + case XDP_SETUP_PROG: + return nicvf_xdp_setup(nic, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = !!nic->xdp_prog; + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops nicvf_netdev_ops = { .ndo_open = nicvf_open, .ndo_stop = nicvf_stop, @@ -1539,6 +1696,7 @@ static const struct net_device_ops nicvf_netdev_ops = { .ndo_tx_timeout = nicvf_tx_timeout, .ndo_fix_features = nicvf_fix_features, .ndo_set_features = nicvf_set_features, + .ndo_xdp = nicvf_xdp, }; static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index e4a02a96d4f0..8c3c571568aa 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -19,14 +19,6 @@ #include "q_struct.h" #include "nicvf_queues.h" -static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) -{ - /* Translation is installed only when IOMMU is present */ - if (nic->iommu_domain) - return iommu_iova_to_phys(nic->iommu_domain, dma_addr); - return dma_addr; -} - static void nicvf_get_page(struct nicvf *nic) { if (!nic->rb_pageref || !nic->rb_page) @@ -149,8 +141,10 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, { struct pgcache *pgcache = NULL; - /* Check if request can be accomodated in previous allocated page */ - if (nic->rb_page && + /* Check if request can be accomodated in previous allocated page. + * But in XDP mode only one buffer per page is permitted. + */ + if (!nic->pnicvf->xdp_prog && nic->rb_page && ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) { nic->rb_pageref++; goto ret; @@ -961,6 +955,7 @@ int nicvf_set_qset_resources(struct nicvf *nic) nic->rx_queues = qs->rq_cnt; nic->tx_queues = qs->sq_cnt; + nic->xdp_tx_queues = 0; return 0; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index da4836601d8c..07136a2819ff 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -10,6 +10,7 @@ #define NICVF_QUEUES_H #include +#include #include "q_struct.h" #define MAX_QUEUE_SET 128 @@ -312,6 +313,14 @@ struct queue_set { #define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT) +static inline u64 nicvf_iova_to_phys(struct nicvf *nic, dma_addr_t dma_addr) +{ + /* Translation is installed only when IOMMU is present */ + if (nic->iommu_domain) + return iommu_iova_to_phys(nic->iommu_domain, dma_addr); + return dma_addr; +} + void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, int hdr_sqe, u8 subdesc_cnt); void nicvf_config_vlan_stripping(struct nicvf *nic, -- cgit v1.2.3 From c56d91ce38d54c0c0dd8d0e4c6a9e0cfa557152f Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:55 +0530 Subject: net: thunderx: Add support for XDP_DROP Adds support for XDP_DROP. Also since in XDP mode there is just a single buffer per page, made changes to recycle DMA mapping info as well along with pages. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 23 ++++++- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 77 ++++++++++++++++------ drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 4 +- 3 files changed, 79 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 9c48873350f8..a58cc1e7b94a 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "nic_reg.h" @@ -505,6 +506,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct cqe_rx_t *cqe_rx) { struct xdp_buff xdp; + struct page *page; u32 action; u16 len; u64 dma_addr, cpu_addr; @@ -527,12 +529,27 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, switch (action) { case XDP_PASS: case XDP_TX: - case XDP_ABORTED: - case XDP_DROP: /* Pass on all packets to network stack */ return false; default: bpf_warn_invalid_xdp_action(action); + case XDP_ABORTED: + trace_xdp_exception(nic->netdev, prog, action); + case XDP_DROP: + page = virt_to_page(xdp.data); + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) == 1) { + dma_addr &= PAGE_MASK; + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, + RCV_FRAG_LEN, DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } + put_page(page); + return true; } return false; } @@ -645,7 +662,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx)) return; - skb = nicvf_get_rcv_skb(snic, cqe_rx); + skb = nicvf_get_rcv_skb(snic, cqe_rx, nic->xdp_prog ? true : false); if (!skb) { netdev_dbg(nic->netdev, "Packet not received\n"); return; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 8c3c571568aa..5009f497e64f 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -117,6 +117,7 @@ static struct pgcache *nicvf_alloc_page(struct nicvf *nic, /* Save the page in page cache */ pgcache->page = page; + pgcache->dma_addr = 0; rbdr->pgalloc++; } @@ -144,7 +145,7 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, /* Check if request can be accomodated in previous allocated page. * But in XDP mode only one buffer per page is permitted. */ - if (!nic->pnicvf->xdp_prog && nic->rb_page && + if (!rbdr->is_xdp && nic->rb_page && ((nic->rb_page_offset + buf_len) <= PAGE_SIZE)) { nic->rb_pageref++; goto ret; @@ -165,18 +166,24 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, if (pgcache) nic->rb_page = pgcache->page; ret: - /* HW will ensure data coherency, CPU sync not required */ - *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, - nic->rb_page_offset, buf_len, - DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { - if (!nic->rb_page_offset) - __free_pages(nic->rb_page, 0); - nic->rb_page = NULL; - return -ENOMEM; + if (rbdr->is_xdp && pgcache && pgcache->dma_addr) { + *rbuf = pgcache->dma_addr; + } else { + /* HW will ensure data coherency, CPU sync not required */ + *rbuf = (u64)dma_map_page_attrs(&nic->pdev->dev, nic->rb_page, + nic->rb_page_offset, buf_len, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + if (dma_mapping_error(&nic->pdev->dev, (dma_addr_t)*rbuf)) { + if (!nic->rb_page_offset) + __free_pages(nic->rb_page, 0); + nic->rb_page = NULL; + return -ENOMEM; + } + if (pgcache) + pgcache->dma_addr = *rbuf; + nic->rb_page_offset += buf_len; } - nic->rb_page_offset += buf_len; return 0; } @@ -230,8 +237,16 @@ static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, * On embedded platforms i.e 81xx/83xx available memory itself * is low and minimum ring size of RBDR is 8K, that takes away * lots of memory. + * + * But for XDP it has to be a single buffer per page. */ - rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size); + if (!nic->pnicvf->xdp_prog) { + rbdr->pgcnt = ring_len / (PAGE_SIZE / buf_size); + rbdr->is_xdp = false; + } else { + rbdr->pgcnt = ring_len; + rbdr->is_xdp = true; + } rbdr->pgcnt = roundup_pow_of_two(rbdr->pgcnt); rbdr->pgcache = kzalloc(sizeof(*rbdr->pgcache) * rbdr->pgcnt, GFP_KERNEL); @@ -1454,8 +1469,31 @@ static inline unsigned frag_num(unsigned i) #endif } +static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr, + u64 buf_addr, bool xdp) +{ + struct page *page = NULL; + int len = RCV_FRAG_LEN; + + if (xdp) { + page = virt_to_page(phys_to_virt(buf_addr)); + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) != 1) + return; + /* Receive buffers in XDP mode are mapped from page start */ + dma_addr &= PAGE_MASK; + } + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, len, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); +} + /* Returns SKB for a received packet */ -struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, + struct cqe_rx_t *cqe_rx, bool xdp) { int frag; int payload_len = 0; @@ -1490,10 +1528,9 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) if (!frag) { /* First fragment */ - dma_unmap_page_attrs(&nic->pdev->dev, - *rb_ptrs - cqe_rx->align_pad, - RCV_FRAG_LEN, DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); + nicvf_unmap_rcv_buffer(nic, + *rb_ptrs - cqe_rx->align_pad, + phys_addr, xdp); skb = nicvf_rb_ptr_to_skb(nic, phys_addr - cqe_rx->align_pad, payload_len); @@ -1503,9 +1540,7 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) skb_put(skb, payload_len); } else { /* Add fragments */ - dma_unmap_page_attrs(&nic->pdev->dev, *rb_ptrs, - RCV_FRAG_LEN, DMA_FROM_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); + nicvf_unmap_rcv_buffer(nic, *rb_ptrs, phys_addr, xdp); page = virt_to_page(phys_to_virt(phys_addr)); offset = phys_to_virt(phys_addr) - page_address(page); skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 07136a2819ff..db04c0e33c6c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -228,6 +228,7 @@ struct rbdr { u32 head; u32 tail; struct q_desc_mem dmem; + bool is_xdp; /* For page recycling */ int pgidx; @@ -339,7 +340,8 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, struct sk_buff *skb, u8 sq_num); -struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx); +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, + struct cqe_rx_t *cqe_rx, bool xdp); void nicvf_rbdr_task(unsigned long data); void nicvf_rbdr_work(struct work_struct *work); -- cgit v1.2.3 From 16f2bccda75da48888772c4829a468be620c5d79 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:56 +0530 Subject: net: thunderx: Add support for XDP_TX Adds support for XDP_TX i.e transmits packet out of the XDP TX queue mapped to the corresponding Rx queue on which packet is received. Since SQ for XDP TX will be used only on a single cpu i.e SQ description creation and freeing, using atomic free count is not necessary and will become a bottleneck. Hence added a separate 'xdp_free_cnt' used for SQs designated for XDP to track descriptor free count. Changes also include - A new entry 'xdp_page' is added to save transmitted packet's page pointer for later cleanup. - XDP Tx SQ's doorbell is ringed once per NAPI instance. - Retrieving designated SQ for packets being sent out by stack via 'nicvf_xmit'. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 63 ++++++++--- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 117 ++++++++++++++++++--- drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 7 ++ 3 files changed, 160 insertions(+), 27 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index a58cc1e7b94a..bb13dee388c3 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -501,9 +501,8 @@ static int nicvf_init_resources(struct nicvf *nic) return 0; } -static inline bool nicvf_xdp_rx(struct nicvf *nic, - struct bpf_prog *prog, - struct cqe_rx_t *cqe_rx) +static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, + struct cqe_rx_t *cqe_rx, struct snd_queue *sq) { struct xdp_buff xdp; struct page *page; @@ -528,9 +527,11 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, switch (action) { case XDP_PASS: - case XDP_TX: - /* Pass on all packets to network stack */ + /* Pass on packet to network stack */ return false; + case XDP_TX: + nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); + return true; default: bpf_warn_invalid_xdp_action(action); case XDP_ABORTED: @@ -560,6 +561,7 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, unsigned int *tx_pkts, unsigned int *tx_bytes) { struct sk_buff *skb = NULL; + struct page *page; struct nicvf *nic = netdev_priv(netdev); struct snd_queue *sq; struct sq_hdr_subdesc *hdr; @@ -575,6 +577,22 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, if (cqe_tx->send_status) nicvf_check_cqe_tx_errs(nic->pnicvf, cqe_tx); + /* Is this a XDP designated Tx queue */ + if (sq->is_xdp) { + page = (struct page *)sq->xdp_page[cqe_tx->sqe_ptr]; + /* Check if it's recycled page or else unmap DMA mapping */ + if (page && (page_ref_count(page) == 1)) + nicvf_unmap_sndq_buffers(nic, sq, cqe_tx->sqe_ptr, + hdr->subdesc_cnt); + + /* Release page reference for recycling */ + if (page) + put_page(page); + sq->xdp_page[cqe_tx->sqe_ptr] = (u64)NULL; + *subdesc_cnt += hdr->subdesc_cnt + 1; + return; + } + skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; if (skb) { /* Check for dummy descriptor used for HW TSO offload on 88xx */ @@ -634,7 +652,7 @@ static inline void nicvf_set_rxhash(struct net_device *netdev, static void nicvf_rcv_pkt_handler(struct net_device *netdev, struct napi_struct *napi, - struct cqe_rx_t *cqe_rx) + struct cqe_rx_t *cqe_rx, struct snd_queue *sq) { struct sk_buff *skb; struct nicvf *nic = netdev_priv(netdev); @@ -659,7 +677,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, /* For XDP, ignore pkts spanning multiple pages */ if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) - if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx)) + if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq)) return; skb = nicvf_get_rcv_skb(snic, cqe_rx, nic->xdp_prog ? true : false); @@ -715,8 +733,8 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; struct netdev_queue *txq; - struct snd_queue *sq; - unsigned int tx_pkts = 0, tx_bytes = 0; + struct snd_queue *sq = &qs->sq[cq_idx]; + unsigned int tx_pkts = 0, tx_bytes = 0, txq_idx; spin_lock_bh(&cq->lock); loop: @@ -746,7 +764,7 @@ loop: switch (cq_desc->cqe_type) { case CQE_TYPE_RX: - nicvf_rcv_pkt_handler(netdev, napi, cq_desc); + nicvf_rcv_pkt_handler(netdev, napi, cq_desc, sq); work_done++; break; case CQE_TYPE_SEND: @@ -773,17 +791,26 @@ loop: goto loop; done: - sq = &nic->qs->sq[cq_idx]; /* Update SQ's descriptor free count */ if (subdesc_cnt) nicvf_put_sq_desc(sq, subdesc_cnt); + txq_idx = nicvf_netdev_qidx(nic, cq_idx); + /* Handle XDP TX queues */ + if (nic->pnicvf->xdp_prog) { + if (txq_idx < nic->pnicvf->xdp_tx_queues) { + nicvf_xdp_sq_doorbell(nic, sq, cq_idx); + goto out; + } + nic = nic->pnicvf; + txq_idx -= nic->pnicvf->xdp_tx_queues; + } + /* Wakeup TXQ if its stopped earlier due to SQ full */ if (tx_done || (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) { netdev = nic->pnicvf->netdev; - txq = netdev_get_tx_queue(netdev, - nicvf_netdev_qidx(nic, cq_idx)); + txq = netdev_get_tx_queue(netdev, txq_idx); if (tx_pkts) netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); @@ -796,10 +823,11 @@ done: if (netif_msg_tx_err(nic)) netdev_warn(netdev, "%s: Transmit queue wakeup SQ%d\n", - netdev->name, cq_idx); + netdev->name, txq_idx); } } +out: spin_unlock_bh(&cq->lock); return work_done; } @@ -1115,6 +1143,13 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_OK; } + /* In XDP case, initial HW tx queues are used for XDP, + * but stack's queue mapping starts at '0', so skip the + * Tx queues attached to Rx queues for XDP. + */ + if (nic->xdp_prog) + qid += nic->xdp_tx_queues; + snic = nic; /* Get secondary Qset's SQ structure */ if (qid >= MAX_SND_QUEUES_PER_QS) { diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 5009f497e64f..ec234b626fe3 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -19,6 +19,8 @@ #include "q_struct.h" #include "nicvf_queues.h" +static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, + int size, u64 data); static void nicvf_get_page(struct nicvf *nic) { if (!nic->rb_pageref || !nic->rb_page) @@ -456,7 +458,7 @@ static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq) /* Initialize transmit queue */ static int nicvf_init_snd_queue(struct nicvf *nic, - struct snd_queue *sq, int q_len) + struct snd_queue *sq, int q_len, int qidx) { int err; @@ -469,17 +471,38 @@ static int nicvf_init_snd_queue(struct nicvf *nic, sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_KERNEL); if (!sq->skbuff) return -ENOMEM; + sq->head = 0; sq->tail = 0; - atomic_set(&sq->free_cnt, q_len - 1); sq->thresh = SND_QUEUE_THRESH; - /* Preallocate memory for TSO segment's header */ - sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, - q_len * TSO_HEADER_SIZE, - &sq->tso_hdrs_phys, GFP_KERNEL); - if (!sq->tso_hdrs) - return -ENOMEM; + /* Check if this SQ is a XDP TX queue */ + if (nic->sqs_mode) + qidx += ((nic->sqs_id + 1) * MAX_SND_QUEUES_PER_QS); + if (qidx < nic->pnicvf->xdp_tx_queues) { + /* Alloc memory to save page pointers for XDP_TX */ + sq->xdp_page = kcalloc(q_len, sizeof(u64), GFP_KERNEL); + if (!sq->xdp_page) + return -ENOMEM; + sq->xdp_desc_cnt = 0; + sq->xdp_free_cnt = q_len - 1; + sq->is_xdp = true; + } else { + sq->xdp_page = NULL; + sq->xdp_desc_cnt = 0; + sq->xdp_free_cnt = 0; + sq->is_xdp = false; + + atomic_set(&sq->free_cnt, q_len - 1); + + /* Preallocate memory for TSO segment's header */ + sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, + q_len * TSO_HEADER_SIZE, + &sq->tso_hdrs_phys, + GFP_KERNEL); + if (!sq->tso_hdrs) + return -ENOMEM; + } return 0; } @@ -505,6 +528,7 @@ void nicvf_unmap_sndq_buffers(struct nicvf *nic, struct snd_queue *sq, static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) { struct sk_buff *skb; + struct page *page; struct sq_hdr_subdesc *hdr; struct sq_hdr_subdesc *tso_sqe; @@ -522,8 +546,15 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) smp_rmb(); while (sq->head != sq->tail) { skb = (struct sk_buff *)sq->skbuff[sq->head]; - if (!skb) + if (!skb || !sq->xdp_page) + goto next; + + page = (struct page *)sq->xdp_page[sq->head]; + if (!page) goto next; + else + put_page(page); + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); /* Check for dummy descriptor used for HW TSO offload on 88xx */ if (hdr->dont_send) { @@ -536,12 +567,14 @@ static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) nicvf_unmap_sndq_buffers(nic, sq, sq->head, hdr->subdesc_cnt); } - dev_kfree_skb_any(skb); + if (skb) + dev_kfree_skb_any(skb); next: sq->head++; sq->head &= (sq->dmem.q_len - 1); } kfree(sq->skbuff); + kfree(sq->xdp_page); nicvf_free_q_desc_mem(nic, &sq->dmem); } @@ -932,7 +965,7 @@ static int nicvf_alloc_resources(struct nicvf *nic) /* Alloc send queue */ for (qidx = 0; qidx < qs->sq_cnt; qidx++) { - if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len)) + if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len, qidx)) goto alloc_fail; } @@ -1035,7 +1068,10 @@ static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt) int qentry; qentry = sq->tail; - atomic_sub(desc_cnt, &sq->free_cnt); + if (!sq->is_xdp) + atomic_sub(desc_cnt, &sq->free_cnt); + else + sq->xdp_free_cnt -= desc_cnt; sq->tail += desc_cnt; sq->tail &= (sq->dmem.q_len - 1); @@ -1053,7 +1089,10 @@ static inline void nicvf_rollback_sq_desc(struct snd_queue *sq, /* Free descriptor back to SQ for future use */ void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt) { - atomic_add(desc_cnt, &sq->free_cnt); + if (!sq->is_xdp) + atomic_add(desc_cnt, &sq->free_cnt); + else + sq->xdp_free_cnt += desc_cnt; sq->head += desc_cnt; sq->head &= (sq->dmem.q_len - 1); } @@ -1111,6 +1150,58 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, } } +/* XDP Transmit APIs */ +void nicvf_xdp_sq_doorbell(struct nicvf *nic, + struct snd_queue *sq, int sq_num) +{ + if (!sq->xdp_desc_cnt) + return; + + /* make sure all memory stores are done before ringing doorbell */ + wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, sq->xdp_desc_cnt); + sq->xdp_desc_cnt = 0; +} + +static inline void +nicvf_xdp_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, + int subdesc_cnt, u64 data, int len) +{ + struct sq_hdr_subdesc *hdr; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + hdr->subdesc_cnt = subdesc_cnt; + hdr->tot_len = len; + hdr->post_cqe = 1; + sq->xdp_page[qentry] = (u64)virt_to_page((void *)data); +} + +int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, + u64 bufaddr, u64 dma_addr, u16 len) +{ + int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT; + int qentry; + + if (subdesc_cnt > sq->xdp_free_cnt) + return 0; + + qentry = nicvf_get_sq_desc(sq, subdesc_cnt); + + nicvf_xdp_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, bufaddr, len); + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + nicvf_sq_add_gather_subdesc(sq, qentry, len, dma_addr); + + sq->xdp_desc_cnt += subdesc_cnt; + + return 1; +} + /* Calculate no of SQ subdescriptors needed to transmit all * segments of this TSO packet. * Taken from 'Tilera network driver' with a minor modification. diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index db04c0e33c6c..a07d5b4d5ce2 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -271,6 +271,10 @@ struct snd_queue { u32 tail; u64 *skbuff; void *desc; + u64 *xdp_page; + u16 xdp_desc_cnt; + u16 xdp_free_cnt; + bool is_xdp; #define TSO_HEADER_SIZE 128 /* For TSO segment's header */ @@ -339,6 +343,9 @@ void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, int qidx); int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq, struct sk_buff *skb, u8 sq_num); +int nicvf_xdp_sq_append_pkt(struct nicvf *nic, struct snd_queue *sq, + u64 bufaddr, u64 dma_addr, u16 len); +void nicvf_xdp_sq_doorbell(struct nicvf *nic, struct snd_queue *sq, int sq_num); struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx, bool xdp); -- cgit v1.2.3 From e3d06ff9ec9400b93bacf8fa92f3985c9412e282 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:57 +0530 Subject: net: thunderx: Support for XDP header adjustment When in XDP mode reserve XDP_PACKET_HEADROOM bytes at the start of receive buffer for XDP program to modify headers and adjust packet start. Additional code changes done to handle such packets. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 63 ++++++++++++++++------ drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 9 +++- 2 files changed, 55 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index bb13dee388c3..d6477af88085 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -502,13 +502,15 @@ static int nicvf_init_resources(struct nicvf *nic) } static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, - struct cqe_rx_t *cqe_rx, struct snd_queue *sq) + struct cqe_rx_t *cqe_rx, struct snd_queue *sq, + struct sk_buff **skb) { struct xdp_buff xdp; struct page *page; u32 action; - u16 len; + u16 len, offset = 0; u64 dma_addr, cpu_addr; + void *orig_data; /* Retrieve packet buffer's DMA address and length */ len = *((u16 *)((void *)cqe_rx + (3 * sizeof(u64)))); @@ -517,17 +519,47 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, cpu_addr = nicvf_iova_to_phys(nic, dma_addr); if (!cpu_addr) return false; + cpu_addr = (u64)phys_to_virt(cpu_addr); + page = virt_to_page((void *)cpu_addr); - xdp.data = phys_to_virt(cpu_addr); + xdp.data_hard_start = page_address(page); + xdp.data = (void *)cpu_addr; xdp.data_end = xdp.data + len; + orig_data = xdp.data; rcu_read_lock(); action = bpf_prog_run_xdp(prog, &xdp); rcu_read_unlock(); + /* Check if XDP program has changed headers */ + if (orig_data != xdp.data) { + len = xdp.data_end - xdp.data; + offset = orig_data - xdp.data; + dma_addr -= offset; + } + switch (action) { case XDP_PASS: - /* Pass on packet to network stack */ + /* Check if it's a recycled page, if not + * unmap the DMA mapping. + * + * Recycled page holds an extra reference. + */ + if (page_ref_count(page) == 1) { + dma_addr &= PAGE_MASK; + dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, + RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + DMA_FROM_DEVICE, + DMA_ATTR_SKIP_CPU_SYNC); + } + + /* Build SKB and pass on packet to network stack */ + *skb = build_skb(xdp.data, + RCV_FRAG_LEN - cqe_rx->align_pad + offset); + if (!*skb) + put_page(page); + else + skb_put(*skb, len); return false; case XDP_TX: nicvf_xdp_sq_append_pkt(nic, sq, (u64)xdp.data, dma_addr, len); @@ -537,7 +569,6 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, case XDP_ABORTED: trace_xdp_exception(nic->netdev, prog, action); case XDP_DROP: - page = virt_to_page(xdp.data); /* Check if it's a recycled page, if not * unmap the DMA mapping. * @@ -546,7 +577,8 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog, if (page_ref_count(page) == 1) { dma_addr &= PAGE_MASK; dma_unmap_page_attrs(&nic->pdev->dev, dma_addr, - RCV_FRAG_LEN, DMA_FROM_DEVICE, + RCV_FRAG_LEN + XDP_PACKET_HEADROOM, + DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); } put_page(page); @@ -654,7 +686,7 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, struct napi_struct *napi, struct cqe_rx_t *cqe_rx, struct snd_queue *sq) { - struct sk_buff *skb; + struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); struct nicvf *snic = nic; int err = 0; @@ -676,15 +708,17 @@ static void nicvf_rcv_pkt_handler(struct net_device *netdev, } /* For XDP, ignore pkts spanning multiple pages */ - if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) - if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq)) + if (nic->xdp_prog && (cqe_rx->rb_cnt == 1)) { + /* Packet consumed by XDP */ + if (nicvf_xdp_rx(snic, nic->xdp_prog, cqe_rx, sq, &skb)) return; + } else { + skb = nicvf_get_rcv_skb(snic, cqe_rx, + nic->xdp_prog ? true : false); + } - skb = nicvf_get_rcv_skb(snic, cqe_rx, nic->xdp_prog ? true : false); - if (!skb) { - netdev_dbg(nic->netdev, "Packet not received\n"); + if (!skb) return; - } if (netif_msg_pktdata(nic)) { netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name, @@ -1672,9 +1706,6 @@ static int nicvf_xdp_setup(struct nicvf *nic, struct bpf_prog *prog) return -EOPNOTSUPP; } - if (prog && prog->xdp_adjust_head) - return -EOPNOTSUPP; - /* ALL SQs attached to CQs i.e same as RQs, are treated as * XDP Tx queues and more Tx queues are allocated for * network stack to send pkts out. diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index ec234b626fe3..43428ce760ca 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -164,6 +164,11 @@ static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, struct rbdr *rbdr, } nic->rb_page_offset = 0; + + /* Reserve space for header modifications by BPF program */ + if (rbdr->is_xdp) + buf_len += XDP_PACKET_HEADROOM; + /* Check if it's recycled */ if (pgcache) nic->rb_page = pgcache->page; @@ -183,7 +188,7 @@ ret: return -ENOMEM; } if (pgcache) - pgcache->dma_addr = *rbuf; + pgcache->dma_addr = *rbuf + XDP_PACKET_HEADROOM; nic->rb_page_offset += buf_len; } @@ -1575,6 +1580,8 @@ static void nicvf_unmap_rcv_buffer(struct nicvf *nic, u64 dma_addr, */ if (page_ref_count(page) != 1) return; + + len += XDP_PACKET_HEADROOM; /* Receive buffers in XDP mode are mapped from page start */ dma_addr &= PAGE_MASK; } -- cgit v1.2.3 From 773225388dae15e72790d6f573e2e70e96292b6b Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 2 May 2017 18:36:58 +0530 Subject: net: thunderx: Optimize page recycling for XDP Driver follows a method of taking one extra reference on the page for recycling which is fine in usual packet path where each 64KB page is segmented into multiple receive buffers. But in XDP mode since there is just one receive buffer per page taking extra page reference itself becomes big bottleneck consuming ~50% of CPU cycles due to atomic operations. This patch adds a internal ref count in pgcache for each page and additional page references are taken in a batch instead of just one at a time. Internal i.e 'pgcache->ref_count' and page's i.e 'page->_refcount' counters are compared to check page's recyclability. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 57 +++++++++++++++++++--- drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 1 + 2 files changed, 51 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index 43428ce760ca..2b181762ad49 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -82,6 +82,8 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) dmem->base = NULL; } +#define XDP_PAGE_REFCNT_REFILL 256 + /* Allocate a new page or recycle one if possible * * We cannot optimize dma mapping here, since @@ -90,9 +92,10 @@ static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) * and not idx into RBDR ring, so can't refer to saved info. * 3. There are multiple receive buffers per page */ -static struct pgcache *nicvf_alloc_page(struct nicvf *nic, - struct rbdr *rbdr, gfp_t gfp) +static inline struct pgcache *nicvf_alloc_page(struct nicvf *nic, + struct rbdr *rbdr, gfp_t gfp) { + int ref_count; struct page *page = NULL; struct pgcache *pgcache, *next; @@ -100,8 +103,23 @@ static struct pgcache *nicvf_alloc_page(struct nicvf *nic, pgcache = &rbdr->pgcache[rbdr->pgidx]; page = pgcache->page; /* Check if page can be recycled */ - if (page && (page_ref_count(page) != 1)) - page = NULL; + if (page) { + ref_count = page_ref_count(page); + /* Check if this page has been used once i.e 'put_page' + * called after packet transmission i.e internal ref_count + * and page's ref_count are equal i.e page can be recycled. + */ + if (rbdr->is_xdp && (ref_count == pgcache->ref_count)) + pgcache->ref_count--; + else + page = NULL; + + /* In non-XDP mode, page's ref_count needs to be '1' for it + * to be recycled. + */ + if (!rbdr->is_xdp && (ref_count != 1)) + page = NULL; + } if (!page) { page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN, 0); @@ -120,11 +138,30 @@ static struct pgcache *nicvf_alloc_page(struct nicvf *nic, /* Save the page in page cache */ pgcache->page = page; pgcache->dma_addr = 0; + pgcache->ref_count = 0; rbdr->pgalloc++; } - /* Take extra page reference for recycling */ - page_ref_add(page, 1); + /* Take additional page references for recycling */ + if (rbdr->is_xdp) { + /* Since there is single RBDR (i.e single core doing + * page recycling) per 8 Rx queues, in XDP mode adjusting + * page references atomically is the biggest bottleneck, so + * take bunch of references at a time. + * + * So here, below reference counts defer by '1'. + */ + if (!pgcache->ref_count) { + pgcache->ref_count = XDP_PAGE_REFCNT_REFILL; + page_ref_add(page, XDP_PAGE_REFCNT_REFILL); + } + } else { + /* In non-XDP case, single 64K page is divided across multiple + * receive buffers, so cost of recycling is less anyway. + * So we can do with just one extra reference. + */ + page_ref_add(page, 1); + } rbdr->pgidx++; rbdr->pgidx &= (rbdr->pgcnt - 1); @@ -327,8 +364,14 @@ static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) head = 0; while (head < rbdr->pgcnt) { pgcache = &rbdr->pgcache[head]; - if (pgcache->page && page_ref_count(pgcache->page) != 0) + if (pgcache->page && page_ref_count(pgcache->page) != 0) { + if (!rbdr->is_xdp) { + put_page(pgcache->page); + continue; + } + page_ref_sub(pgcache->page, pgcache->ref_count - 1); put_page(pgcache->page); + } head++; } diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index a07d5b4d5ce2..57858522c33c 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -216,6 +216,7 @@ struct q_desc_mem { struct pgcache { struct page *page; + int ref_count; u64 dma_addr; }; -- cgit v1.2.3