diff options
author | Jakub Kicinski <jakub.kicinski@netronome.com> | 2019-11-22 16:46:33 -0800 |
---|---|---|
committer | Jakub Kicinski <jakub.kicinski@netronome.com> | 2019-11-22 16:46:33 -0800 |
commit | 091189ab68d0b45de2f343b0c432804cacdeaf78 (patch) | |
tree | d834cce65f93a315991b73df1807e67cba4f81e8 | |
parent | a9f852e92e40992c4ff09ac3940f7725e016317a (diff) | |
parent | 8311f0be9763386556d7b698f5e101c688f9c2eb (diff) | |
download | linux-091189ab68d0b45de2f343b0c432804cacdeaf78.tar.bz2 |
Merge branch 'cxgb4-add-udp-segmentation-offload-support'
Rahul Lakkireddy says:
====================
This series of patches add UDP Segmentation Offload (USO) supported
by Chelsio T5/T6 NICs.
Patch 1 updates the current Scatter Gather List (SGL) DMA unmap logic
for USO requests.
Patch 2 adds USO support for NIC and MQPRIO QoS offload Tx path.
Patch 3 adds missing stats for MQPRIO QoS offload Tx path.
====================
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
-rw-r--r-- | drivers/crypto/chelsio/chcr_ipsec.c | 27 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 21 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 16 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/sge.c | 290 | ||||
-rw-r--r-- | drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h | 14 |
8 files changed, 218 insertions, 165 deletions
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c index 24355680f30a..9da0f93a330b 100644 --- a/drivers/crypto/chelsio/chcr_ipsec.c +++ b/drivers/crypto/chelsio/chcr_ipsec.c @@ -673,16 +673,16 @@ static inline void txq_advance(struct sge_txq *q, unsigned int n) int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev) { struct xfrm_state *x = xfrm_input_state(skb); + unsigned int last_desc, ndesc, flits = 0; struct ipsec_sa_entry *sa_entry; u64 *pos, *end, *before, *sgl; + struct tx_sw_desc *sgl_sdesc; int qidx, left, credits; - unsigned int flits = 0, ndesc; - struct adapter *adap; + bool immediate = false; struct sge_eth_txq *q; + struct adapter *adap; struct port_info *pi; - dma_addr_t addr[MAX_SKB_FRAGS + 1]; struct sec_path *sp; - bool immediate = false; if (!x->xso.offload_handle) return NETDEV_TX_BUSY; @@ -715,8 +715,14 @@ out_free: dev_kfree_skb_any(skb); return NETDEV_TX_BUSY; } + last_desc = q->q.pidx + ndesc - 1; + if (last_desc >= q->q.size) + last_desc -= q->q.size; + sgl_sdesc = &q->q.sdesc[last_desc]; + if (!immediate && - unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) { + unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) { + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); q->mapping_err++; goto out_free; } @@ -742,17 +748,10 @@ out_free: dev_kfree_skb_any(skb); cxgb4_inline_tx_skb(skb, &q->q, sgl); dev_consume_skb_any(skb); } else { - int last_desc; - cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, - 0, addr); + 0, sgl_sdesc->addr); skb_orphan(skb); - - last_desc = q->q.pidx + ndesc - 1; - if (last_desc >= q->q.size) - last_desc -= q->q.size; - q->q.sdesc[last_desc].skb = skb; - q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl; + sgl_sdesc->skb = skb; } txq_advance(&q->q, ndesc); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 3121ed83d8e2..a70ac2097892 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -735,7 +735,12 @@ struct tx_desc { __be64 flit[8]; }; -struct tx_sw_desc; +struct ulptx_sgl; + +struct tx_sw_desc { + struct sk_buff *skb; /* SKB to free after getting completion */ + dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* DMA mapped addresses */ +}; struct sge_txq { unsigned int in_use; /* # of in-use Tx descriptors */ @@ -767,6 +772,7 @@ struct sge_eth_txq { /* state for an SGE Ethernet Tx queue */ u8 dbqt; /* SGE Doorbell Queue Timer in use */ unsigned int dbqtimerix; /* SGE Doorbell Queue Timer Index */ unsigned long tso; /* # of TSO requests */ + unsigned long uso; /* # of USO requests */ unsigned long tx_cso; /* # of Tx checksum offloads */ unsigned long vlan_ins; /* # of Tx VLAN insertions */ unsigned long mapping_err; /* # of I/O MMU packet mapping errors */ @@ -814,15 +820,10 @@ enum sge_eosw_state { CXGB4_EO_STATE_FLOWC_CLOSE_REPLY, /* Waiting for FLOWC close reply */ }; -struct sge_eosw_desc { - struct sk_buff *skb; /* SKB to free after getting completion */ - dma_addr_t addr[MAX_SKB_FRAGS + 1]; /* DMA mapped addresses */ -}; - struct sge_eosw_txq { spinlock_t lock; /* Per queue lock to synchronize completions */ enum sge_eosw_state state; /* Current ETHOFLD State */ - struct sge_eosw_desc *desc; /* Descriptor ring to hold packets */ + struct tx_sw_desc *desc; /* Descriptor ring to hold packets */ u32 ndesc; /* Number of descriptors */ u32 pidx; /* Current Producer Index */ u32 last_pidx; /* Last successfully transmitted Producer Index */ @@ -849,6 +850,7 @@ struct sge_eohw_txq { struct sge_txq q; /* HW Txq */ struct adapter *adap; /* Backpointer to adapter */ unsigned long tso; /* # of TSO requests */ + unsigned long uso; /* # of USO requests */ unsigned long tx_cso; /* # of Tx checksum offloads */ unsigned long vlan_ins; /* # of Tx VLAN insertions */ unsigned long mapping_err; /* # of I/O MMU packet mapping errors */ @@ -1151,11 +1153,6 @@ enum { SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */ }; -struct tx_sw_desc { /* SW state per Tx descriptor */ - struct sk_buff *skb; - struct ulptx_sgl *sgl; -}; - /* Support for "sched_queue" command to allow one or more NIC TX Queues * to be bound to a TX Scheduling Class. */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index a13b03f771cc..93868dca186a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2748,6 +2748,7 @@ do { \ RL("RxDrops:", stats.rx_drops); RL("RxBadPkts:", stats.bad_rx_pkts); TL("TSO:", tso); + TL("USO:", uso); TL("TxCSO:", tx_cso); TL("VLANins:", vlan_ins); TL("TxQFull:", q.stops); @@ -2796,6 +2797,7 @@ do { \ RL("RxAN", stats.an); RL("RxNoMem", stats.nomem); TL("TSO:", tso); + TL("USO:", uso); TL("TxCSO:", tx_cso); TL("VLANins:", vlan_ins); TL("TxQFull:", q.stops); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 76538f4cd595..20ab3b6285a2 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -91,6 +91,7 @@ static const char stats_strings[][ETH_GSTRING_LEN] = { "rx_bg3_frames_trunc ", "tso ", + "uso ", "tx_csum_offload ", "rx_csum_good ", "vlan_extractions ", @@ -220,6 +221,7 @@ static void get_strings(struct net_device *dev, u32 stringset, u8 *data) */ struct queue_port_stats { u64 tso; + u64 uso; u64 tx_csum; u64 rx_csum; u64 vlan_ex; @@ -240,13 +242,15 @@ static void collect_sge_port_stats(const struct adapter *adap, const struct port_info *p, struct queue_port_stats *s) { - int i; const struct sge_eth_txq *tx = &adap->sge.ethtxq[p->first_qset]; const struct sge_eth_rxq *rx = &adap->sge.ethrxq[p->first_qset]; + struct sge_eohw_txq *eohw_tx; + unsigned int i; memset(s, 0, sizeof(*s)); for (i = 0; i < p->nqsets; i++, rx++, tx++) { s->tso += tx->tso; + s->uso += tx->uso; s->tx_csum += tx->tx_cso; s->rx_csum += rx->stats.rx_cso; s->vlan_ex += rx->stats.vlan_ex; @@ -254,6 +258,16 @@ static void collect_sge_port_stats(const struct adapter *adap, s->gro_pkts += rx->stats.lro_pkts; s->gro_merged += rx->stats.lro_merged; } + + if (adap->sge.eohw_txq) { + eohw_tx = &adap->sge.eohw_txq[p->first_qset]; + for (i = 0; i < p->nqsets; i++, eohw_tx++) { + s->tso += eohw_tx->tso; + s->uso += eohw_tx->uso; + s->tx_csum += eohw_tx->tx_cso; + s->vlan_ins += eohw_tx->vlan_ins; + } + } } static void collect_adapter_stats(struct adapter *adap, struct adapter_stats *s) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index e8a1826a1e90..12ff69b3ba91 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1136,11 +1136,17 @@ static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, if (dev->num_tc) { struct port_info *pi = netdev2pinfo(dev); + u8 ver, proto; + + ver = ip_hdr(skb)->version; + proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr : + ip_hdr(skb)->protocol; /* Send unsupported traffic pattern to normal NIC queues. */ txq = netdev_pick_tx(dev, skb, sb_dev); if (xfrm_offload(skb) || is_ptp_enabled(skb, dev) || - ip_hdr(skb)->protocol != IPPROTO_TCP) + skb->encapsulation || + (proto != IPPROTO_TCP && proto != IPPROTO_UDP)) txq = txq % pi->nqsets; return txq; @@ -5838,7 +5844,8 @@ static void free_some_resources(struct adapter *adapter) t4_fw_bye(adapter, adapter->pf); } -#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define TSO_FLAGS (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN | \ + NETIF_F_GSO_UDP_L4) #define VLAN_FEAT (NETIF_F_SG | NETIF_F_IP_CSUM | TSO_FLAGS | \ NETIF_F_GRO | NETIF_F_IPV6_CSUM | NETIF_F_HIGHDMA) #define SEGMENT_SIZE 128 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c index db55673b77bd..477973d2e341 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c @@ -70,7 +70,7 @@ static int cxgb4_init_eosw_txq(struct net_device *dev, u32 eotid, u32 hwqid) { struct adapter *adap = netdev2adap(dev); - struct sge_eosw_desc *ring; + struct tx_sw_desc *ring; memset(eosw_txq, 0, sizeof(*eosw_txq)); diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index a0400b9a11e9..97cda501e7e8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -300,65 +300,6 @@ static void deferred_unmap_destructor(struct sk_buff *skb) } #endif -static void unmap_sgl(struct device *dev, const struct sk_buff *skb, - const struct ulptx_sgl *sgl, const struct sge_txq *q) -{ - const struct ulptx_sge_pair *p; - unsigned int nfrags = skb_shinfo(skb)->nr_frags; - - if (likely(skb_headlen(skb))) - dma_unmap_single(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0), - DMA_TO_DEVICE); - else { - dma_unmap_page(dev, be64_to_cpu(sgl->addr0), ntohl(sgl->len0), - DMA_TO_DEVICE); - nfrags--; - } - - /* - * the complexity below is because of the possibility of a wrap-around - * in the middle of an SGL - */ - for (p = sgl->sge; nfrags >= 2; nfrags -= 2) { - if (likely((u8 *)(p + 1) <= (u8 *)q->stat)) { -unmap: dma_unmap_page(dev, be64_to_cpu(p->addr[0]), - ntohl(p->len[0]), DMA_TO_DEVICE); - dma_unmap_page(dev, be64_to_cpu(p->addr[1]), - ntohl(p->len[1]), DMA_TO_DEVICE); - p++; - } else if ((u8 *)p == (u8 *)q->stat) { - p = (const struct ulptx_sge_pair *)q->desc; - goto unmap; - } else if ((u8 *)p + 8 == (u8 *)q->stat) { - const __be64 *addr = (const __be64 *)q->desc; - - dma_unmap_page(dev, be64_to_cpu(addr[0]), - ntohl(p->len[0]), DMA_TO_DEVICE); - dma_unmap_page(dev, be64_to_cpu(addr[1]), - ntohl(p->len[1]), DMA_TO_DEVICE); - p = (const struct ulptx_sge_pair *)&addr[2]; - } else { - const __be64 *addr = (const __be64 *)q->desc; - - dma_unmap_page(dev, be64_to_cpu(p->addr[0]), - ntohl(p->len[0]), DMA_TO_DEVICE); - dma_unmap_page(dev, be64_to_cpu(addr[0]), - ntohl(p->len[1]), DMA_TO_DEVICE); - p = (const struct ulptx_sge_pair *)&addr[1]; - } - } - if (nfrags) { - __be64 addr; - - if ((u8 *)p == (u8 *)q->stat) - p = (const struct ulptx_sge_pair *)q->desc; - addr = (u8 *)p + 16 <= (u8 *)q->stat ? p->addr[0] : - *(const __be64 *)q->desc; - dma_unmap_page(dev, be64_to_cpu(addr), ntohl(p->len[0]), - DMA_TO_DEVICE); - } -} - /** * free_tx_desc - reclaims Tx descriptors and their buffers * @adapter: the adapter @@ -372,15 +313,16 @@ unmap: dma_unmap_page(dev, be64_to_cpu(p->addr[0]), void free_tx_desc(struct adapter *adap, struct sge_txq *q, unsigned int n, bool unmap) { - struct tx_sw_desc *d; unsigned int cidx = q->cidx; - struct device *dev = adap->pdev_dev; + struct tx_sw_desc *d; d = &q->sdesc[cidx]; while (n--) { if (d->skb) { /* an SGL is present */ - if (unmap) - unmap_sgl(dev, d->skb, d->sgl, q); + if (unmap && d->addr[0]) { + unmap_skb(adap->pdev_dev, d->skb, d->addr); + memset(d->addr, 0, sizeof(d->addr)); + } dev_consume_skb_any(d->skb); d->skb = NULL; } @@ -792,6 +734,8 @@ static inline int is_eth_imm(const struct sk_buff *skb, unsigned int chip_ver) chip_ver > CHELSIO_T5) { hdrlen = sizeof(struct cpl_tx_tnl_lso); hdrlen += sizeof(struct cpl_tx_pkt_core); + } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + return 0; } else { hdrlen = skb_shinfo(skb)->gso_size ? sizeof(struct cpl_tx_pkt_lso_core) : 0; @@ -833,12 +777,20 @@ static inline unsigned int calc_tx_flits(const struct sk_buff *skb, */ flits = sgl_len(skb_shinfo(skb)->nr_frags + 1); if (skb_shinfo(skb)->gso_size) { - if (skb->encapsulation && chip_ver > CHELSIO_T5) + if (skb->encapsulation && chip_ver > CHELSIO_T5) { hdrlen = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_tnl_lso); - else + } else if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) { + u32 pkt_hdrlen; + + pkt_hdrlen = eth_get_headlen(skb->dev, skb->data, + skb_headlen(skb)); + hdrlen = sizeof(struct fw_eth_tx_eo_wr) + + round_up(pkt_hdrlen, 16); + } else { hdrlen = sizeof(struct fw_eth_tx_pkt_wr) + sizeof(struct cpl_tx_pkt_lso_core); + } hdrlen += sizeof(struct cpl_tx_pkt_core); flits += (hdrlen / sizeof(__be64)); @@ -1403,6 +1355,25 @@ static inline int cxgb4_validate_skb(struct sk_buff *skb, return 0; } +static void *write_eo_udp_wr(struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr, + u32 hdr_len) +{ + wr->u.udpseg.type = FW_ETH_TX_EO_TYPE_UDPSEG; + wr->u.udpseg.ethlen = skb_network_offset(skb); + wr->u.udpseg.iplen = cpu_to_be16(skb_network_header_len(skb)); + wr->u.udpseg.udplen = sizeof(struct udphdr); + wr->u.udpseg.rtplen = 0; + wr->u.udpseg.r4 = 0; + if (skb_shinfo(skb)->gso_size) + wr->u.udpseg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); + else + wr->u.udpseg.mss = cpu_to_be16(skb->len - hdr_len); + wr->u.udpseg.schedpktsize = wr->u.udpseg.mss; + wr->u.udpseg.plen = cpu_to_be32(skb->len - hdr_len); + + return (void *)(wr + 1); +} + /** * cxgb4_eth_xmit - add a packet to an Ethernet Tx queue * @skb: the packet @@ -1414,15 +1385,16 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) { enum cpl_tx_tnl_lso_type tnl_type = TX_TNL_TYPE_OPAQUE; bool ptp_enabled = is_ptp_enabled(skb, dev); - dma_addr_t addr[MAX_SKB_FRAGS + 1]; + unsigned int last_desc, flits, ndesc; + u32 wr_mid, ctrl0, op, sgl_off = 0; const struct skb_shared_info *ssi; + int len, qidx, credits, ret, left; + struct tx_sw_desc *sgl_sdesc; + struct fw_eth_tx_eo_wr *eowr; struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; - int len, qidx, credits, ret; const struct port_info *pi; - unsigned int flits, ndesc; bool immediate = false; - u32 wr_mid, ctrl0, op; u64 cntrl, *end, *sgl; struct sge_eth_txq *q; unsigned int chip_ver; @@ -1489,8 +1461,14 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->encapsulation && chip_ver > CHELSIO_T5) tnl_type = cxgb_encap_offload_supported(skb); + last_desc = q->q.pidx + ndesc - 1; + if (last_desc >= q->q.size) + last_desc -= q->q.size; + sgl_sdesc = &q->q.sdesc[last_desc]; + if (!immediate && - unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) { + unlikely(cxgb4_map_skb(adap->pdev_dev, skb, sgl_sdesc->addr) < 0)) { + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); q->mapping_err++; if (ptp_enabled) spin_unlock(&adap->ptp_lock); @@ -1521,13 +1499,17 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) } wr = (void *)&q->q.desc[q->q.pidx]; + eowr = (void *)&q->q.desc[q->q.pidx]; wr->equiq_to_len16 = htonl(wr_mid); wr->r3 = cpu_to_be64(0); - end = (u64 *)wr + flits; + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + end = (u64 *)eowr + flits; + else + end = (u64 *)wr + flits; len = immediate ? skb->len : 0; len += sizeof(*cpl); - if (ssi->gso_size) { + if (ssi->gso_size && !(ssi->gso_type & SKB_GSO_UDP_L4)) { struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); struct cpl_tx_tnl_lso *tnl_lso = (void *)(wr + 1); @@ -1559,20 +1541,29 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) cntrl = hwcsum(adap->params.chip, skb); } sgl = (u64 *)(cpl + 1); /* sgl start here */ - if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) { - /* If current position is already at the end of the - * txq, reset the current to point to start of the queue - * and update the end ptr as well. - */ - if (sgl == (u64 *)q->q.stat) { - int left = (u8 *)end - (u8 *)q->q.stat; - - end = (void *)q->q.desc + left; - sgl = (void *)q->q.desc; - } - } q->tso++; q->tx_cso += ssi->gso_segs; + } else if (ssi->gso_size) { + u64 *start; + u32 hdrlen; + + hdrlen = eth_get_headlen(dev, skb->data, skb_headlen(skb)); + len += hdrlen; + wr->op_immdlen = cpu_to_be32(FW_WR_OP_V(FW_ETH_TX_EO_WR) | + FW_ETH_TX_EO_WR_IMMDLEN_V(len)); + cpl = write_eo_udp_wr(skb, eowr, hdrlen); + cntrl = hwcsum(adap->params.chip, skb); + + start = (u64 *)(cpl + 1); + sgl = (u64 *)inline_tx_skb_header(skb, &q->q, (void *)start, + hdrlen); + if (unlikely(start > sgl)) { + left = (u8 *)end - (u8 *)q->q.stat; + end = (void *)q->q.desc + left; + } + sgl_off = hdrlen; + q->uso++; + q->tx_cso += ssi->gso_segs; } else { if (ptp_enabled) op = FW_PTP_TX_PKT_WR; @@ -1589,6 +1580,16 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) } } + if (unlikely((u8 *)sgl >= (u8 *)q->q.stat)) { + /* If current position is already at the end of the + * txq, reset the current to point to start of the queue + * and update the end ptr as well. + */ + left = (u8 *)end - (u8 *)q->q.stat; + end = (void *)q->q.desc + left; + sgl = (void *)q->q.desc; + } + if (skb_vlan_tag_present(skb)) { q->vlan_ins++; cntrl |= TXPKT_VLAN_VLD_F | TXPKT_VLAN_V(skb_vlan_tag_get(skb)); @@ -1618,16 +1619,10 @@ static netdev_tx_t cxgb4_eth_xmit(struct sk_buff *skb, struct net_device *dev) cxgb4_inline_tx_skb(skb, &q->q, sgl); dev_consume_skb_any(skb); } else { - int last_desc; - - cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, 0, addr); + cxgb4_write_sgl(skb, &q->q, (void *)sgl, end, sgl_off, + sgl_sdesc->addr); skb_orphan(skb); - - last_desc = q->q.pidx + ndesc - 1; - if (last_desc >= q->q.size) - last_desc -= q->q.size; - q->q.sdesc[last_desc].skb = skb; - q->q.sdesc[last_desc].sgl = (struct ulptx_sgl *)sgl; + sgl_sdesc->skb = skb; } txq_advance(&q->q, ndesc); @@ -1725,12 +1720,12 @@ static inline unsigned int t4vf_calc_tx_flits(const struct sk_buff *skb) static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, struct net_device *dev) { - dma_addr_t addr[MAX_SKB_FRAGS + 1]; + unsigned int last_desc, flits, ndesc; const struct skb_shared_info *ssi; struct fw_eth_tx_pkt_vm_wr *wr; + struct tx_sw_desc *sgl_sdesc; struct cpl_tx_pkt_core *cpl; const struct port_info *pi; - unsigned int flits, ndesc; struct sge_eth_txq *txq; struct adapter *adapter; int qidx, credits, ret; @@ -1782,12 +1777,19 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, return NETDEV_TX_BUSY; } + last_desc = txq->q.pidx + ndesc - 1; + if (last_desc >= txq->q.size) + last_desc -= txq->q.size; + sgl_sdesc = &txq->q.sdesc[last_desc]; + if (!t4vf_is_eth_imm(skb) && - unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, addr) < 0)) { + unlikely(cxgb4_map_skb(adapter->pdev_dev, skb, + sgl_sdesc->addr) < 0)) { /* We need to map the skb into PCI DMA space (because it can't * be in-lined directly into the Work Request) and the mapping * operation failed. Record the error and drop the packet. */ + memset(sgl_sdesc->addr, 0, sizeof(sgl_sdesc->addr)); txq->mapping_err++; goto out_free; } @@ -1962,7 +1964,6 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, */ struct ulptx_sgl *sgl = (struct ulptx_sgl *)(cpl + 1); struct sge_txq *tq = &txq->q; - int last_desc; /* If the Work Request header was an exact multiple of our TX * Descriptor length, then it's possible that the starting SGL @@ -1976,14 +1977,9 @@ static netdev_tx_t cxgb4_vf_eth_xmit(struct sk_buff *skb, ((void *)end - (void *)tq->stat)); } - cxgb4_write_sgl(skb, tq, sgl, end, 0, addr); + cxgb4_write_sgl(skb, tq, sgl, end, 0, sgl_sdesc->addr); skb_orphan(skb); - - last_desc = tq->pidx + ndesc - 1; - if (last_desc >= tq->size) - last_desc -= tq->size; - tq->sdesc[last_desc].skb = skb; - tq->sdesc[last_desc].sgl = sgl; + sgl_sdesc->skb = skb; } /* Advance our internal TX Queue state, tell the hardware about @@ -2035,7 +2031,7 @@ static inline void eosw_txq_advance_index(u32 *idx, u32 n, u32 max) void cxgb4_eosw_txq_free_desc(struct adapter *adap, struct sge_eosw_txq *eosw_txq, u32 ndesc) { - struct sge_eosw_desc *d; + struct tx_sw_desc *d; d = &eosw_txq->desc[eosw_txq->last_cidx]; while (ndesc--) { @@ -2081,7 +2077,8 @@ static inline u8 ethofld_calc_tx_flits(struct adapter *adap, u32 wrlen; wrlen = sizeof(struct fw_eth_tx_eo_wr) + sizeof(struct cpl_tx_pkt_core); - if (skb_shinfo(skb)->gso_size) + if (skb_shinfo(skb)->gso_size && + !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)) wrlen += sizeof(struct cpl_tx_pkt_lso_core); wrlen += roundup(hdr_len, 16); @@ -2089,10 +2086,14 @@ static inline u8 ethofld_calc_tx_flits(struct adapter *adap, /* Packet headers + WR + CPLs */ flits = DIV_ROUND_UP(wrlen, 8); - if (skb_shinfo(skb)->nr_frags > 0) - nsgl = sgl_len(skb_shinfo(skb)->nr_frags); - else if (skb->len - hdr_len) + if (skb_shinfo(skb)->nr_frags > 0) { + if (skb_headlen(skb) - hdr_len) + nsgl = sgl_len(skb_shinfo(skb)->nr_frags + 1); + else + nsgl = sgl_len(skb_shinfo(skb)->nr_frags); + } else if (skb->len - hdr_len) { nsgl = sgl_len(1); + } return flits + nsgl; } @@ -2106,16 +2107,16 @@ static inline void *write_eo_wr(struct adapter *adap, struct cpl_tx_pkt_core *cpl; u32 immd_len, wrlen16; bool compl = false; + u8 ver, proto; + + ver = ip_hdr(skb)->version; + proto = (ver == 6) ? ipv6_hdr(skb)->nexthdr : ip_hdr(skb)->protocol; wrlen16 = DIV_ROUND_UP(wrlen, 16); immd_len = sizeof(struct cpl_tx_pkt_core); - if (skb_shinfo(skb)->gso_size) { - if (skb->encapsulation && - CHELSIO_CHIP_VERSION(adap->params.chip) > CHELSIO_T5) - immd_len += sizeof(struct cpl_tx_tnl_lso); - else - immd_len += sizeof(struct cpl_tx_pkt_lso_core); - } + if (skb_shinfo(skb)->gso_size && + !(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)) + immd_len += sizeof(struct cpl_tx_pkt_lso_core); immd_len += hdr_len; if (!eosw_txq->ncompl || @@ -2131,23 +2132,27 @@ static inline void *write_eo_wr(struct adapter *adap, wr->equiq_to_len16 = cpu_to_be32(FW_WR_LEN16_V(wrlen16) | FW_WR_FLOWID_V(eosw_txq->hwtid)); wr->r3 = 0; - wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; - wr->u.tcpseg.ethlen = skb_network_offset(skb); - wr->u.tcpseg.iplen = cpu_to_be16(skb_network_header_len(skb)); - wr->u.tcpseg.tcplen = tcp_hdrlen(skb); - wr->u.tcpseg.tsclk_tsoff = 0; - wr->u.tcpseg.r4 = 0; - wr->u.tcpseg.r5 = 0; - wr->u.tcpseg.plen = cpu_to_be32(skb->len - hdr_len); - - if (ssi->gso_size) { - struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); - - wr->u.tcpseg.mss = cpu_to_be16(ssi->gso_size); - cpl = write_tso_wr(adap, skb, lso); + if (proto == IPPROTO_UDP) { + cpl = write_eo_udp_wr(skb, wr, hdr_len); } else { - wr->u.tcpseg.mss = cpu_to_be16(0xffff); - cpl = (void *)(wr + 1); + wr->u.tcpseg.type = FW_ETH_TX_EO_TYPE_TCPSEG; + wr->u.tcpseg.ethlen = skb_network_offset(skb); + wr->u.tcpseg.iplen = cpu_to_be16(skb_network_header_len(skb)); + wr->u.tcpseg.tcplen = tcp_hdrlen(skb); + wr->u.tcpseg.tsclk_tsoff = 0; + wr->u.tcpseg.r4 = 0; + wr->u.tcpseg.r5 = 0; + wr->u.tcpseg.plen = cpu_to_be32(skb->len - hdr_len); + + if (ssi->gso_size) { + struct cpl_tx_pkt_lso_core *lso = (void *)(wr + 1); + + wr->u.tcpseg.mss = cpu_to_be16(ssi->gso_size); + cpl = write_tso_wr(adap, skb, lso); + } else { + wr->u.tcpseg.mss = cpu_to_be16(0xffff); + cpl = (void *)(wr + 1); + } } eosw_txq->cred -= wrlen16; @@ -2167,7 +2172,7 @@ static void ethofld_hard_xmit(struct net_device *dev, struct cpl_tx_pkt_core *cpl; struct fw_eth_tx_eo_wr *wr; bool skip_eotx_wr = false; - struct sge_eosw_desc *d; + struct tx_sw_desc *d; struct sk_buff *skb; u8 flits, ndesc; int left; @@ -2257,6 +2262,19 @@ write_wr_headers: d->addr); } + if (skb_shinfo(skb)->gso_size) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) + eohw_txq->uso++; + else + eohw_txq->tso++; + eohw_txq->tx_cso += skb_shinfo(skb)->gso_segs; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { + eohw_txq->tx_cso++; + } + + if (skb_vlan_tag_present(skb)) + eohw_txq->vlan_ins++; + txq_advance(&eohw_txq->q, ndesc); cxgb4_ring_tx_db(adap, &eohw_txq->q, ndesc); eosw_txq_advance_index(&eosw_txq->last_pidx, 1, eosw_txq->ndesc); @@ -4369,7 +4387,10 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, txq->q.q_type = CXGB4_TXQ_ETH; init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd))); txq->txq = netdevq; - txq->tso = txq->tx_cso = txq->vlan_ins = 0; + txq->tso = 0; + txq->uso = 0; + txq->tx_cso = 0; + txq->vlan_ins = 0; txq->mapping_err = 0; txq->dbqt = dbqt; @@ -4538,6 +4559,7 @@ int t4_sge_alloc_ethofld_txq(struct adapter *adap, struct sge_eohw_txq *txq, spin_lock_init(&txq->lock); txq->adap = adap; txq->tso = 0; + txq->uso = 0; txq->tx_cso = 0; txq->vlan_ins = 0; txq->mapping_err = 0; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 414e5cca293e..ac4fb43bdec6 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -536,7 +536,8 @@ struct fw_eth_tx_pkt_wr { }; enum fw_eth_tx_eo_type { - FW_ETH_TX_EO_TYPE_TCPSEG = 1, + FW_ETH_TX_EO_TYPE_UDPSEG = 0, + FW_ETH_TX_EO_TYPE_TCPSEG, }; struct fw_eth_tx_eo_wr { @@ -544,6 +545,17 @@ struct fw_eth_tx_eo_wr { __be32 equiq_to_len16; __be64 r3; union fw_eth_tx_eo { + struct fw_eth_tx_eo_udpseg { + __u8 type; + __u8 ethlen; + __be16 iplen; + __u8 udplen; + __u8 rtplen; + __be16 r4; + __be16 mss; + __be16 schedpktsize; + __be32 plen; + } udpseg; struct fw_eth_tx_eo_tcpseg { __u8 type; __u8 ethlen; |