From d7e7747ac5c2496c98291944c6066adaa9f3b975 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 26 Aug 2021 15:54:22 +0200 Subject: netfilter: refuse insertion if chain has grown too large Also add a stat counter for this that gets exported both via old /proc interface and ctnetlink. Assuming the old default size of 16536 buckets and max hash occupancy of 64k, this results in 128k insertions (origin+reply), so ~8 entries per chain on average. The revised settings in this series will result in about two entries per bucket on average. This allows a hard-limit ceiling of 64. This is not tunable at the moment, but its possible to either increase nf_conntrack_buckets or decrease nf_conntrack_max to reduce average lengths. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_common.h | 1 + include/uapi/linux/netfilter/nfnetlink_conntrack.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 0c7d8d1e945d..700ea077ce2d 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -18,6 +18,7 @@ struct ip_conntrack_stat { unsigned int expect_create; unsigned int expect_delete; unsigned int search_restart; + unsigned int chaintoolong; }; #define NFCT_INFOMASK 7UL diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index d8484be72fdc..5ade231f497b 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -257,6 +257,7 @@ enum ctattr_stats_cpu { CTA_STATS_ERROR, CTA_STATS_SEARCH_RESTART, CTA_STATS_CLASH_RESOLVE, + CTA_STATS_CHAIN_TOOLONG, __CTA_STATS_MAX, }; #define CTA_STATS_MAX (__CTA_STATS_MAX - 1) -- cgit v1.2.3 From ef6c8da71eaffe4e251b0ff2a1d0da96f89fe6b0 Mon Sep 17 00:00:00 2001 From: Geetha sowjanya Date: Wed, 1 Sep 2021 15:25:50 +0530 Subject: octeontx2-pf: cn10K: Reserve LMTST lines per core This patch reserves the LMTST lines per cpu instead of separate LMTST lines for NPA(buffer free) and NIX(sqe flush). LMTST line of the core on which SQ or RQ is processed is used for LMTST operation. This patch also replace STEOR with STEORL release semantics and updates driver name in ethtool file. Signed-off-by: Geetha sowjanya Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c | 42 ++++++++++------------ .../ethernet/marvell/octeontx2/nic/otx2_common.c | 5 --- .../ethernet/marvell/octeontx2/nic/otx2_common.h | 28 ++++++++------- .../ethernet/marvell/octeontx2/nic/otx2_ethtool.c | 4 +-- .../net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 12 +++---- .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h | 2 -- include/linux/soc/marvell/octeontx2/asm.h | 11 ++++-- 7 files changed, 49 insertions(+), 55 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c index 3cc76f14d2fd..95f21dfdba48 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k.c @@ -27,7 +27,8 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf) { struct lmtst_tbl_setup_req *req; - int qcount, err; + struct otx2_lmt_info *lmt_info; + int err, cpu; if (!test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) { pfvf->hw_ops = &otx2_hw_ops; @@ -35,15 +36,9 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf) } pfvf->hw_ops = &cn10k_hw_ops; - qcount = pfvf->hw.max_queues; - /* LMTST lines allocation - * qcount = num_online_cpus(); - * NPA = TX + RX + XDP. - * NIX = TX * 32 (For Burst SQE flush). - */ - pfvf->tot_lmt_lines = (qcount * 3) + (qcount * 32); - pfvf->npa_lmt_lines = qcount * 3; - pfvf->nix_lmt_size = LMT_BURST_SIZE * LMT_LINE_SIZE; + /* Total LMTLINES = num_online_cpus() * 32 (For Burst flush).*/ + pfvf->tot_lmt_lines = (num_online_cpus() * LMT_BURST_SIZE); + pfvf->hw.lmt_info = alloc_percpu(struct otx2_lmt_info); mutex_lock(&pfvf->mbox.lock); req = otx2_mbox_alloc_msg_lmtst_tbl_setup(&pfvf->mbox); @@ -66,6 +61,13 @@ int cn10k_lmtst_init(struct otx2_nic *pfvf) err = otx2_sync_mbox_msg(&pfvf->mbox); mutex_unlock(&pfvf->mbox.lock); + for_each_possible_cpu(cpu) { + lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, cpu); + lmt_info->lmt_addr = ((u64)pfvf->hw.lmt_base + + (cpu * LMT_BURST_SIZE * LMT_LINE_SIZE)); + lmt_info->lmt_id = cpu * LMT_BURST_SIZE; + } + return 0; } EXPORT_SYMBOL(cn10k_lmtst_init); @@ -74,13 +76,6 @@ int cn10k_sq_aq_init(void *dev, u16 qidx, u16 sqb_aura) { struct nix_cn10k_aq_enq_req *aq; struct otx2_nic *pfvf = dev; - struct otx2_snd_queue *sq; - - sq = &pfvf->qset.sq[qidx]; - sq->lmt_addr = (u64 *)((u64)pfvf->hw.nix_lmt_base + - (qidx * pfvf->nix_lmt_size)); - - sq->lmt_id = pfvf->npa_lmt_lines + (qidx * LMT_BURST_SIZE); /* Get memory to put this msg */ aq = otx2_mbox_alloc_msg_nix_cn10k_aq_enq(&pfvf->mbox); @@ -125,8 +120,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) if (otx2_alloc_buffer(pfvf, cq, &bufptr)) { if (num_ptrs--) __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs, - num_ptrs, - cq->rbpool->lmt_addr); + num_ptrs); break; } cq->pool_ptrs--; @@ -134,8 +128,7 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) num_ptrs++; if (num_ptrs == NPA_MAX_BURST || cq->pool_ptrs == 0) { __cn10k_aura_freeptr(pfvf, cq->cq_idx, ptrs, - num_ptrs, - cq->rbpool->lmt_addr); + num_ptrs); num_ptrs = 1; } } @@ -143,20 +136,23 @@ void cn10k_refill_pool_ptrs(void *dev, struct otx2_cq_queue *cq) void cn10k_sqe_flush(void *dev, struct otx2_snd_queue *sq, int size, int qidx) { + struct otx2_lmt_info *lmt_info; + struct otx2_nic *pfvf = dev; u64 val = 0, tar_addr = 0; + lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, smp_processor_id()); /* FIXME: val[0:10] LMT_ID. * [12:15] no of LMTST - 1 in the burst. * [19:63] data size of each LMTST in the burst except first. */ - val = (sq->lmt_id & 0x7FF); + val = (lmt_info->lmt_id & 0x7FF); /* Target address for LMTST flush tells HW how many 128bit * words are present. * tar_addr[6:4] size of first LMTST - 1 in units of 128b. */ tar_addr |= sq->io_addr | (((size / 16) - 1) & 0x7) << 4; dma_wmb(); - memcpy(sq->lmt_addr, sq->sqe_base, size); + memcpy((u64 *)lmt_info->lmt_addr, sq->sqe_base, size); cn10k_lmt_flush(val, tar_addr); sq->head++; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index ce25c2744435..78df173e6df2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -1230,11 +1230,6 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id, pool->rbsize = buf_size; - /* Set LMTST addr for NPA batch free */ - if (test_bit(CN10K_LMTST, &pfvf->hw.cap_flag)) - pool->lmt_addr = (__force u64 *)((u64)pfvf->hw.npa_lmt_base + - (pool_id * LMT_LINE_SIZE)); - /* Initialize this pool's context via AF */ aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox); if (!aq) { diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index 48227cec06ee..a51ecd771d07 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -53,6 +53,10 @@ enum arua_mapped_qtypes { /* Send skid of 2000 packets required for CQ size of 4K CQEs. */ #define SEND_CQ_SKID 2000 +struct otx2_lmt_info { + u64 lmt_addr; + u16 lmt_id; +}; /* RSS configuration */ struct otx2_rss_ctx { u8 ind_tbl[MAX_RSS_INDIR_TBL_SIZE]; @@ -224,8 +228,7 @@ struct otx2_hw { #define LMT_LINE_SIZE 128 #define LMT_BURST_SIZE 32 /* 32 LMTST lines for burst SQE flush */ u64 *lmt_base; - u64 *npa_lmt_base; - u64 *nix_lmt_base; + struct otx2_lmt_info __percpu *lmt_info; }; enum vfperm { @@ -407,17 +410,18 @@ static inline bool is_96xx_B0(struct pci_dev *pdev) */ #define PCI_REVISION_ID_96XX 0x00 #define PCI_REVISION_ID_95XX 0x10 -#define PCI_REVISION_ID_LOKI 0x20 +#define PCI_REVISION_ID_95XXN 0x20 #define PCI_REVISION_ID_98XX 0x30 #define PCI_REVISION_ID_95XXMM 0x40 +#define PCI_REVISION_ID_95XXO 0xE0 static inline bool is_dev_otx2(struct pci_dev *pdev) { u8 midr = pdev->revision & 0xF0; return (midr == PCI_REVISION_ID_96XX || midr == PCI_REVISION_ID_95XX || - midr == PCI_REVISION_ID_LOKI || midr == PCI_REVISION_ID_98XX || - midr == PCI_REVISION_ID_95XXMM); + midr == PCI_REVISION_ID_95XXN || midr == PCI_REVISION_ID_98XX || + midr == PCI_REVISION_ID_95XXMM || midr == PCI_REVISION_ID_95XXO); } static inline void otx2_setup_dev_hw_settings(struct otx2_nic *pfvf) @@ -562,15 +566,16 @@ static inline u64 otx2_atomic64_add(u64 incr, u64 *ptr) #endif static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, - u64 *ptrs, u64 num_ptrs, - u64 *lmt_addr) + u64 *ptrs, u64 num_ptrs) { + struct otx2_lmt_info *lmt_info; u64 size = 0, count_eot = 0; u64 tar_addr, val = 0; + lmt_info = per_cpu_ptr(pfvf->hw.lmt_info, smp_processor_id()); tar_addr = (__force u64)otx2_get_regaddr(pfvf, NPA_LF_AURA_BATCH_FREE0); /* LMTID is same as AURA Id */ - val = (aura & 0x7FF) | BIT_ULL(63); + val = (lmt_info->lmt_id & 0x7FF) | BIT_ULL(63); /* Set if [127:64] of last 128bit word has a valid pointer */ count_eot = (num_ptrs % 2) ? 0ULL : 1ULL; /* Set AURA ID to free pointer */ @@ -586,7 +591,7 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, size++; tar_addr |= ((size - 1) & 0x7) << 4; } - memcpy(lmt_addr, ptrs, sizeof(u64) * num_ptrs); + memcpy((u64 *)lmt_info->lmt_addr, ptrs, sizeof(u64) * num_ptrs); /* Perform LMTST flush */ cn10k_lmt_flush(val, tar_addr); } @@ -594,12 +599,11 @@ static inline void __cn10k_aura_freeptr(struct otx2_nic *pfvf, u64 aura, static inline void cn10k_aura_freeptr(void *dev, int aura, u64 buf) { struct otx2_nic *pfvf = dev; - struct otx2_pool *pool; u64 ptrs[2]; - pool = &pfvf->qset.pool[aura]; ptrs[1] = buf; - __cn10k_aura_freeptr(pfvf, aura, ptrs, 2, pool->lmt_addr); + /* Free only one buffer at time during init and teardown */ + __cn10k_aura_freeptr(pfvf, aura, ptrs, 2); } /* Alloc pointer from pool/aura */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 799486c72177..dbfa3bc39e34 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -16,8 +16,8 @@ #include "otx2_common.h" #include "otx2_ptp.h" -#define DRV_NAME "octeontx2-nicpf" -#define DRV_VF_NAME "octeontx2-nicvf" +#define DRV_NAME "rvu-nicpf" +#define DRV_VF_NAME "rvu-nicvf" struct otx2_stat { char name[ETH_GSTRING_LEN]; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 2f2e8a3d7924..53df7fff92c4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1533,14 +1533,6 @@ int otx2_open(struct net_device *netdev) if (!qset->rq) goto err_free_mem; - if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) { - /* Reserve LMT lines for NPA AURA batch free */ - pf->hw.npa_lmt_base = pf->hw.lmt_base; - /* Reserve LMT lines for NIX TX */ - pf->hw.nix_lmt_base = (u64 *)((u64)pf->hw.npa_lmt_base + - (pf->npa_lmt_lines * LMT_LINE_SIZE)); - } - err = otx2_init_hw_resources(pf); if (err) goto err_free_mem; @@ -2668,6 +2660,8 @@ err_del_mcam_entries: err_ptp_destroy: otx2_ptp_destroy(pf); err_detach_rsrc: + if (pf->hw.lmt_info) + free_percpu(pf->hw.lmt_info); if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) qmem_free(pf->dev, pf->dync_lmt); otx2_detach_resources(&pf->mbox); @@ -2811,6 +2805,8 @@ static void otx2_remove(struct pci_dev *pdev) otx2_mcam_flow_del(pf); otx2_shutdown_tc(pf); otx2_detach_resources(&pf->mbox); + if (pf->hw.lmt_info) + free_percpu(pf->hw.lmt_info); if (test_bit(CN10K_LMTST, &pf->hw.cap_flag)) qmem_free(pf->dev, pf->dync_lmt); otx2_disable_mbox_intr(pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h index 869de5f59e73..3ff1ad79c001 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h @@ -80,7 +80,6 @@ struct otx2_snd_queue { u16 num_sqbs; u16 sqe_thresh; u8 sqe_per_sqb; - u32 lmt_id; u64 io_addr; u64 *aura_fc_addr; u64 *lmt_addr; @@ -111,7 +110,6 @@ struct otx2_cq_poll { struct otx2_pool { struct qmem *stack; struct qmem *fc_addr; - u64 *lmt_addr; u16 rbsize; }; diff --git a/include/linux/soc/marvell/octeontx2/asm.h b/include/linux/soc/marvell/octeontx2/asm.h index 28c04d918f0f..fa1d6af0164e 100644 --- a/include/linux/soc/marvell/octeontx2/asm.h +++ b/include/linux/soc/marvell/octeontx2/asm.h @@ -22,12 +22,17 @@ : [rs]"r" (ioaddr)); \ (result); \ }) +/* + * STEORL store to memory with release semantics. + * This will avoid using DMB barrier after each LMTST + * operation. + */ #define cn10k_lmt_flush(val, addr) \ ({ \ __asm__ volatile(".cpu generic+lse\n" \ - "steor %x[rf],[%[rs]]" \ - : [rf]"+r"(val) \ - : [rs]"r"(addr)); \ + "steorl %x[rf],[%[rs]]" \ + : [rf] "+r"(val) \ + : [rs] "r"(addr)); \ }) #else #define otx2_lmt_flush(ioaddr) ({ 0; }) -- cgit v1.2.3 From b9edbfe1adecfc48fd11061dce68afb03d6adbdc Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 2 Sep 2021 14:36:17 +0900 Subject: flow: fix object-size-mismatch warning in flowi{4,6}_to_flowi_common() Commit 3df98d79215ace13 ("lsm,selinux: pass flowi_common instead of flowi to the LSM hooks") introduced flowi{4,6}_to_flowi_common() functions which cause UBSAN warning when building with LLVM 11.0.1 on Ubuntu 21.04. ================================================================================ UBSAN: object-size-mismatch in ./include/net/flow.h:197:33 member access within address ffffc9000109fbd8 with insufficient space for an object of type 'struct flowi' CPU: 2 PID: 7410 Comm: systemd-resolve Not tainted 5.14.0 #51 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 02/27/2020 Call Trace: dump_stack_lvl+0x103/0x171 ubsan_type_mismatch_common+0x1de/0x390 __ubsan_handle_type_mismatch_v1+0x41/0x50 udp_sendmsg+0xda2/0x1300 ? ip_skb_dst_mtu+0x1f0/0x1f0 ? sock_rps_record_flow+0xe/0x200 ? inet_send_prepare+0x2d/0x90 sock_sendmsg+0x49/0x80 ____sys_sendmsg+0x269/0x370 __sys_sendmsg+0x15e/0x1d0 ? syscall_enter_from_user_mode+0xf0/0x1b0 do_syscall_64+0x3d/0xb0 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7f7081a50497 Code: 0c 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10 RSP: 002b:00007ffc153870f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e RAX: ffffffffffffffda RBX: 000000000000000c RCX: 00007f7081a50497 RDX: 0000000000000000 RSI: 00007ffc15387140 RDI: 000000000000000c RBP: 00007ffc15387140 R08: 0000563f29a5e4fc R09: 000000000000cd28 R10: 0000563f29a68a30 R11: 0000000000000246 R12: 000000000000000c R13: 0000000000000001 R14: 0000563f29a68a30 R15: 0000563f29a5e50c ================================================================================ I don't think we need to call flowi{4,6}_to_flowi() from these functions because the first member of "struct flowi4" and "struct flowi6" is struct flowi_common __fl_common; while the first member of "struct flowi" is union { struct flowi_common __fl_common; struct flowi4 ip4; struct flowi6 ip6; struct flowidn dn; } u; which should point to the same address without access to "struct flowi". Signed-off-by: Tetsuo Handa Signed-off-by: David S. Miller --- include/net/flow.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/flow.h b/include/net/flow.h index 6f5e70240071..58beb16a49b8 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -194,7 +194,7 @@ static inline struct flowi *flowi4_to_flowi(struct flowi4 *fl4) static inline struct flowi_common *flowi4_to_flowi_common(struct flowi4 *fl4) { - return &(flowi4_to_flowi(fl4)->u.__fl_common); + return &(fl4->__fl_common); } static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) @@ -204,7 +204,7 @@ static inline struct flowi *flowi6_to_flowi(struct flowi6 *fl6) static inline struct flowi_common *flowi6_to_flowi_common(struct flowi6 *fl6) { - return &(flowi6_to_flowi(fl6)->u.__fl_common); + return &(fl6->__fl_common); } static inline struct flowi *flowidn_to_flowi(struct flowidn *fldn) -- cgit v1.2.3 From c7c5e6ff533fe1f9afef7d2fa46678987a1335a7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 3 Sep 2021 15:03:43 -0700 Subject: fq_codel: reject silly quantum parameters syzbot found that forcing a big quantum attribute would crash hosts fast, essentially using this: tc qd replace dev eth0 root fq_codel quantum 4294967295 This is because fq_codel_dequeue() would have to loop ~2^31 times in : if (flow->deficit <= 0) { flow->deficit += q->quantum; list_move_tail(&flow->flowchain, &q->old_flows); goto begin; } SFQ max quantum is 2^19 (half a megabyte) Lets adopt a max quantum of one megabyte for FQ_CODEL. Fixes: 4b549a2ef4be ("fq_codel: Fair Queue Codel AQM") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- include/uapi/linux/pkt_sched.h | 2 ++ net/sched/sch_fq_codel.c | 12 ++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 79a699f106b1..ec88590b3198 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -827,6 +827,8 @@ struct tc_codel_xstats { /* FQ_CODEL */ +#define FQ_CODEL_QUANTUM_MAX (1 << 20) + enum { TCA_FQ_CODEL_UNSPEC, TCA_FQ_CODEL_TARGET, diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index c4afdd026f51..bb0cd6d3d2c2 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -369,6 +369,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, { struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; + u32 quantum = 0; int err; if (!opt) @@ -386,6 +387,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->flows_cnt > 65536) return -EINVAL; } + if (tb[TCA_FQ_CODEL_QUANTUM]) { + quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (quantum > FQ_CODEL_QUANTUM_MAX) { + NL_SET_ERR_MSG(extack, "Invalid quantum"); + return -EINVAL; + } + } sch_tree_lock(sch); if (tb[TCA_FQ_CODEL_TARGET]) { @@ -412,8 +420,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_CODEL_ECN]) q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]); - if (tb[TCA_FQ_CODEL_QUANTUM]) - q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (quantum) + q->quantum = quantum; if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); -- cgit v1.2.3 From 48eab831ae8b9f7002a533fa4235eed63ea1f1a3 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 2 Sep 2021 11:10:37 -0700 Subject: net: create netdev->dev_addr assignment helpers Recent work on converting address list to a tree made it obvious we need an abstraction around writing netdev->dev_addr. Without such abstraction updating the main device address is invisible to the core. Introduce a number of helpers which for now just wrap memcpy() but in the future can make necessary changes to the address tree. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 12 ++++++++++++ include/linux/netdevice.h | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 330345b1be54..928c411bd509 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -299,6 +299,18 @@ static inline void ether_addr_copy(u8 *dst, const u8 *src) #endif } +/** + * eth_hw_addr_set - Assign Ethernet address to a net_device + * @dev: pointer to net_device structure + * @addr: address to assign + * + * Assign given address to the net_device, addr_assign_type is not changed. + */ +static inline void eth_hw_addr_set(struct net_device *dev, const u8 *addr) +{ + ether_addr_copy(dev->dev_addr, addr); +} + /** * eth_hw_addr_inherit - Copy dev_addr from another net_device * @dst: pointer to net_device to copy dev_addr to diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7c41593c1d6a..d79163208dfd 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4641,6 +4641,24 @@ void __hw_addr_unsync_dev(struct netdev_hw_addr_list *list, void __hw_addr_init(struct netdev_hw_addr_list *list); /* Functions used for device addresses handling */ +static inline void +__dev_addr_set(struct net_device *dev, const u8 *addr, size_t len) +{ + memcpy(dev->dev_addr, addr, len); +} + +static inline void dev_addr_set(struct net_device *dev, const u8 *addr) +{ + __dev_addr_set(dev, addr, dev->addr_len); +} + +static inline void +dev_addr_mod(struct net_device *dev, unsigned int offset, + const u8 *addr, size_t len) +{ + memcpy(&dev->dev_addr[offset], addr, len); +} + int dev_addr_add(struct net_device *dev, const unsigned char *addr, unsigned char addr_type); int dev_addr_del(struct net_device *dev, const unsigned char *addr, -- cgit v1.2.3 From f97493657c6372eeefe70faadd214bf31488c44e Mon Sep 17 00:00:00 2001 From: "Russell King (Oracle)" Date: Tue, 7 Sep 2021 18:56:46 +0800 Subject: net: phylink: add suspend/resume support Joakim Zhang reports that Wake-on-Lan with the stmmac ethernet driver broke when moving the incorrect handling of mac link state out of mac_config(). This reason this breaks is because the stmmac's WoL is handled by the MAC rather than the PHY, and phylink doesn't cater for that scenario. This patch adds the necessary phylink code to handle suspend/resume events according to whether the MAC still needs a valid link or not. This is the barest minimum for this support. Reported-by: Joakim Zhang Tested-by: Joakim Zhang Signed-off-by: Russell King (Oracle) Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- drivers/net/phy/phylink.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/phylink.h | 3 ++ 2 files changed, 85 insertions(+) (limited to 'include') diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 2cdf9f989dec..a1464b764d4d 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -33,6 +33,7 @@ enum { PHYLINK_DISABLE_STOPPED, PHYLINK_DISABLE_LINK, + PHYLINK_DISABLE_MAC_WOL, }; /** @@ -1282,6 +1283,9 @@ EXPORT_SYMBOL_GPL(phylink_start); * network device driver's &struct net_device_ops ndo_stop() method. The * network device's carrier state should not be changed prior to calling this * function. + * + * This will synchronously bring down the link if the link is not already + * down (in other words, it will trigger a mac_link_down() method call.) */ void phylink_stop(struct phylink *pl) { @@ -1301,6 +1305,84 @@ void phylink_stop(struct phylink *pl) } EXPORT_SYMBOL_GPL(phylink_stop); +/** + * phylink_suspend() - handle a network device suspend event + * @pl: a pointer to a &struct phylink returned from phylink_create() + * @mac_wol: true if the MAC needs to receive packets for Wake-on-Lan + * + * Handle a network device suspend event. There are several cases: + * - If Wake-on-Lan is not active, we can bring down the link between + * the MAC and PHY by calling phylink_stop(). + * - If Wake-on-Lan is active, and being handled only by the PHY, we + * can also bring down the link between the MAC and PHY. + * - If Wake-on-Lan is active, but being handled by the MAC, the MAC + * still needs to receive packets, so we can not bring the link down. + */ +void phylink_suspend(struct phylink *pl, bool mac_wol) +{ + ASSERT_RTNL(); + + if (mac_wol && (!pl->netdev || pl->netdev->wol_enabled)) { + /* Wake-on-Lan enabled, MAC handling */ + mutex_lock(&pl->state_mutex); + + /* Stop the resolver bringing the link up */ + __set_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state); + + /* Disable the carrier, to prevent transmit timeouts, + * but one would hope all packets have been sent. This + * also means phylink_resolve() will do nothing. + */ + netif_carrier_off(pl->netdev); + + /* We do not call mac_link_down() here as we want the + * link to remain up to receive the WoL packets. + */ + mutex_unlock(&pl->state_mutex); + } else { + phylink_stop(pl); + } +} +EXPORT_SYMBOL_GPL(phylink_suspend); + +/** + * phylink_resume() - handle a network device resume event + * @pl: a pointer to a &struct phylink returned from phylink_create() + * + * Undo the effects of phylink_suspend(), returning the link to an + * operational state. + */ +void phylink_resume(struct phylink *pl) +{ + ASSERT_RTNL(); + + if (test_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state)) { + /* Wake-on-Lan enabled, MAC handling */ + + /* Call mac_link_down() so we keep the overall state balanced. + * Do this under the state_mutex lock for consistency. This + * will cause a "Link Down" message to be printed during + * resume, which is harmless - the true link state will be + * printed when we run a resolve. + */ + mutex_lock(&pl->state_mutex); + phylink_link_down(pl); + mutex_unlock(&pl->state_mutex); + + /* Re-apply the link parameters so that all the settings get + * restored to the MAC. + */ + phylink_mac_initial_config(pl, true); + + /* Re-enable and re-resolve the link parameters */ + clear_bit(PHYLINK_DISABLE_MAC_WOL, &pl->phylink_disable_state); + phylink_run_resolve(pl); + } else { + phylink_start(pl); + } +} +EXPORT_SYMBOL_GPL(phylink_resume); + /** * phylink_ethtool_get_wol() - get the wake on lan parameters for the PHY * @pl: a pointer to a &struct phylink returned from phylink_create() diff --git a/include/linux/phylink.h b/include/linux/phylink.h index afb3ded0b691..237291196ce2 100644 --- a/include/linux/phylink.h +++ b/include/linux/phylink.h @@ -451,6 +451,9 @@ void phylink_mac_change(struct phylink *, bool up); void phylink_start(struct phylink *); void phylink_stop(struct phylink *); +void phylink_suspend(struct phylink *pl, bool mac_wol); +void phylink_resume(struct phylink *pl); + void phylink_ethtool_get_wol(struct phylink *, struct ethtool_wolinfo *); int phylink_ethtool_set_wol(struct phylink *, struct ethtool_wolinfo *); -- cgit v1.2.3