From cb0d54cbf94866b48a73e10a73a55655f808cc7c Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Tue, 15 Mar 2022 18:20:48 +0200 Subject: net/mlx5e: Fix wrong source vport matching on tunnel rule When OVS internal port is the vtep device, the first decap rule is matching on the internal port's vport metadata value and then changes the metadata to be the uplink's value. Therefore, following rules on the tunnel, in chain > 0, should avoid matching on internal port metadata and use the uplink vport metadata instead. Select the uplink's metadata value for the source vport match in case the rule is in chain greater than zero, even if the tunnel route device is internal port. Fixes: 166f431ec6be ("net/mlx5e: Add indirect tc offload of ovs internal port") Signed-off-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 3f63df127091..3b151332e2f8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -139,7 +139,7 @@ mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw, if (mlx5_esw_indir_table_decap_vport(attr)) vport = mlx5_esw_indir_table_decap_vport(attr); - if (esw_attr->int_port) + if (attr && !attr->chain && esw_attr->int_port) metadata = mlx5e_tc_int_port_get_metadata_for_match(esw_attr->int_port); else -- cgit v1.2.3 From 7ba2d9d8de96696c1451fee1b01da11f45bdc2b9 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Thu, 3 Mar 2022 19:02:03 +0200 Subject: net/mlx5: Fix slab-out-of-bounds while reading resource dump menu Resource dump menu may span over more than a single page, support it. Otherwise, menu read may result in a memory access violation: reading outside of the allocated page. Note that page format of the first menu page contains menu headers while the proceeding menu pages contain only records. The KASAN logs are as follows: BUG: KASAN: slab-out-of-bounds in strcmp+0x9b/0xb0 Read of size 1 at addr ffff88812b2e1fd0 by task systemd-udevd/496 CPU: 5 PID: 496 Comm: systemd-udevd Tainted: G B 5.16.0_for_upstream_debug_2022_01_10_23_12 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack_lvl+0x57/0x7d print_address_description.constprop.0+0x1f/0x140 ? strcmp+0x9b/0xb0 ? strcmp+0x9b/0xb0 kasan_report.cold+0x83/0xdf ? strcmp+0x9b/0xb0 strcmp+0x9b/0xb0 mlx5_rsc_dump_init+0x4ab/0x780 [mlx5_core] ? mlx5_rsc_dump_destroy+0x80/0x80 [mlx5_core] ? lockdep_hardirqs_on_prepare+0x286/0x400 ? raw_spin_unlock_irqrestore+0x47/0x50 ? aomic_notifier_chain_register+0x32/0x40 mlx5_load+0x104/0x2e0 [mlx5_core] mlx5_init_one+0x41b/0x610 [mlx5_core] .... The buggy address belongs to the object at ffff88812b2e0000 which belongs to the cache kmalloc-4k of size 4096 The buggy address is located 4048 bytes to the right of 4096-byte region [ffff88812b2e0000, ffff88812b2e1000) The buggy address belongs to the page: page:000000009d69807a refcount:1 mapcount:0 mapping:0000000000000000 index:0xffff88812b2e6000 pfn:0x12b2e0 head:000000009d69807a order:3 compound_mapcount:0 compound_pincount:0 flags: 0x8000000000010200(slab|head|zone=2) raw: 8000000000010200 0000000000000000 dead000000000001 ffff888100043040 raw: ffff88812b2e6000 0000000080040000 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88812b2e1e80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffff88812b2e1f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffff88812b2e1f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffff88812b2e2000: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ffff88812b2e2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ================================================================== Fixes: 12206b17235a ("net/mlx5: Add support for resource dump") Signed-off-by: Aya Levin Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/diag/rsc_dump.c | 31 +++++++++++++++++----- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c index 538adab6878b..c5b560a8b026 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/rsc_dump.c @@ -31,6 +31,7 @@ static const char *const mlx5_rsc_sgmt_name[] = { struct mlx5_rsc_dump { u32 pdn; u32 mkey; + u32 number_of_menu_items; u16 fw_segment_type[MLX5_SGMT_TYPE_NUM]; }; @@ -50,21 +51,37 @@ static int mlx5_rsc_dump_sgmt_get_by_name(char *name) return -EINVAL; } -static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page) +#define MLX5_RSC_DUMP_MENU_HEADER_SIZE (MLX5_ST_SZ_BYTES(resource_dump_info_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_command_segment) + \ + MLX5_ST_SZ_BYTES(resource_dump_menu_segment)) + +static int mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct page *page, + int read_size, int start_idx) { void *data = page_address(page); enum mlx5_sgmt_type sgmt_idx; int num_of_items; char *sgmt_name; void *member; + int size = 0; void *menu; int i; - menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); - num_of_items = MLX5_GET(resource_dump_menu_segment, menu, num_of_records); + if (!start_idx) { + menu = MLX5_ADDR_OF(menu_resource_dump_response, data, menu); + rsc_dump->number_of_menu_items = MLX5_GET(resource_dump_menu_segment, menu, + num_of_records); + size = MLX5_RSC_DUMP_MENU_HEADER_SIZE; + data += size; + } + num_of_items = rsc_dump->number_of_menu_items; + + for (i = 0; start_idx + i < num_of_items; i++) { + size += MLX5_ST_SZ_BYTES(resource_dump_menu_record); + if (size >= read_size) + return start_idx + i; - for (i = 0; i < num_of_items; i++) { - member = MLX5_ADDR_OF(resource_dump_menu_segment, menu, record[i]); + member = data + MLX5_ST_SZ_BYTES(resource_dump_menu_record) * i; sgmt_name = MLX5_ADDR_OF(resource_dump_menu_record, member, segment_name); sgmt_idx = mlx5_rsc_dump_sgmt_get_by_name(sgmt_name); if (sgmt_idx == -EINVAL) @@ -72,6 +89,7 @@ static void mlx5_rsc_dump_read_menu_sgmt(struct mlx5_rsc_dump *rsc_dump, struct rsc_dump->fw_segment_type[sgmt_idx] = MLX5_GET(resource_dump_menu_record, member, segment_type); } + return 0; } static int mlx5_rsc_dump_trigger(struct mlx5_core_dev *dev, struct mlx5_rsc_dump_cmd *cmd, @@ -168,6 +186,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) struct mlx5_rsc_dump_cmd *cmd = NULL; struct mlx5_rsc_key key = {}; struct page *page; + int start_idx = 0; int size; int err; @@ -189,7 +208,7 @@ static int mlx5_rsc_dump_menu(struct mlx5_core_dev *dev) if (err < 0) goto destroy_cmd; - mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page); + start_idx = mlx5_rsc_dump_read_menu_sgmt(dev->rsc_dump, page, size, start_idx); } while (err > 0); -- cgit v1.2.3 From ada09af92e621ab500dd80a16d1d0299a18a1180 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 28 Mar 2022 15:54:52 +0300 Subject: net/mlx5e: Don't match double-vlan packets if cvlan is not set Currently, match VLAN rule also matches packets that have multiple VLAN headers. This behavior is similar to buggy flower classifier behavior that has recently been fixed. Fix the issue by matching on outer_second_cvlan_tag with value 0 which will cause the HW to verify the packet doesn't contain second vlan header. Fixes: 699e96ddf47f ("net/mlx5e: Support offloading tc double vlan headers match") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index e3fc15ae7bb1..ac0f73074f7a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2459,6 +2459,17 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, match.key->vlan_priority); *match_level = MLX5_MATCH_L2; + + if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) && + match.mask->vlan_eth_type && + MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, + ft_field_support.outer_second_vid, + fs_type)) { + MLX5_SET(fte_match_set_misc, misc_c, + outer_second_cvlan_tag, 1); + spec->match_criteria_enable |= + MLX5_MATCH_MISC_PARAMETERS; + } } } else if (*match_level != MLX5_MATCH_NONE) { /* cvlan_tag enabled in match criteria and -- cgit v1.2.3 From c4d963a588a6e7c4ef31160e80697ae8e5a47746 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Wed, 6 Apr 2022 10:30:21 +0300 Subject: net/mlx5e: Fix the calling of update_buffer_lossy() API The arguments of update_buffer_lossy() is in a wrong order. Fix it. Fixes: 88b3d5c90e96 ("net/mlx5e: Fix port buffers cell size value") Signed-off-by: Mark Zhang Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 673f1c82d381..c9d5d8d93994 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -309,8 +309,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; - err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, port_buff_cell_sz, - xoff, &port_buffer, &update_buffer); + err = update_buffer_lossy(max_mtu, curr_pfc_en, prio2buffer, xoff, + port_buff_cell_sz, &port_buffer, &update_buffer); if (err) return err; } -- cgit v1.2.3 From 27b0420fd959e38e3500e60b637d39dfab065645 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:32:19 +0300 Subject: net/mlx5e: Lag, Fix use-after-free in fib event handler Recent commit that modified fib route event handler to handle events according to their priority introduced use-after-free[0] in mp->mfi pointer usage. The pointer now is not just cached in order to be compared to following fib_info instances, but is also dereferenced to obtain fib_priority. However, since mlx5 lag code doesn't hold the reference to fin_info during whole mp->mfi lifetime, it could be used after fib_info instance has already been freed be kernel infrastructure code. Don't ever dereference mp->mfi pointer. Refactor it to be 'const void*' type and cache fib_info priority in dedicated integer. Group fib_info-related data into dedicated 'fib' structure that will be further extended by following patches in the series. [0]: [ 203.588029] ================================================================== [ 203.590161] BUG: KASAN: use-after-free in mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.592386] Read of size 4 at addr ffff888144df2050 by task kworker/u20:4/138 [ 203.594766] CPU: 3 PID: 138 Comm: kworker/u20:4 Tainted: G B 5.17.0-rc7+ #6 [ 203.596751] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 203.598813] Workqueue: mlx5_lag_mp mlx5_lag_fib_update [mlx5_core] [ 203.600053] Call Trace: [ 203.600608] [ 203.601110] dump_stack_lvl+0x48/0x5e [ 203.601860] print_address_description.constprop.0+0x1f/0x160 [ 203.602950] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.604073] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.605177] kasan_report.cold+0x83/0xdf [ 203.605969] ? mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.607102] mlx5_lag_fib_update+0xabd/0xd60 [mlx5_core] [ 203.608199] ? mlx5_lag_init_fib_work+0x1c0/0x1c0 [mlx5_core] [ 203.609382] ? read_word_at_a_time+0xe/0x20 [ 203.610463] ? strscpy+0xa0/0x2a0 [ 203.611463] process_one_work+0x722/0x1270 [ 203.612344] worker_thread+0x540/0x11e0 [ 203.613136] ? rescuer_thread+0xd50/0xd50 [ 203.613949] kthread+0x26e/0x300 [ 203.614627] ? kthread_complete_and_exit+0x20/0x20 [ 203.615542] ret_from_fork+0x1f/0x30 [ 203.616273] [ 203.617174] Allocated by task 3746: [ 203.617874] kasan_save_stack+0x1e/0x40 [ 203.618644] __kasan_kmalloc+0x81/0xa0 [ 203.619394] fib_create_info+0xb41/0x3c50 [ 203.620213] fib_table_insert+0x190/0x1ff0 [ 203.621020] fib_magic.isra.0+0x246/0x2e0 [ 203.621803] fib_add_ifaddr+0x19f/0x670 [ 203.622563] fib_inetaddr_event+0x13f/0x270 [ 203.623377] blocking_notifier_call_chain+0xd4/0x130 [ 203.624355] __inet_insert_ifa+0x641/0xb20 [ 203.625185] inet_rtm_newaddr+0xc3d/0x16a0 [ 203.626009] rtnetlink_rcv_msg+0x309/0x880 [ 203.626826] netlink_rcv_skb+0x11d/0x340 [ 203.627626] netlink_unicast+0x4cc/0x790 [ 203.628430] netlink_sendmsg+0x762/0xc00 [ 203.629230] sock_sendmsg+0xb2/0xe0 [ 203.629955] ____sys_sendmsg+0x58a/0x770 [ 203.630756] ___sys_sendmsg+0xd8/0x160 [ 203.631523] __sys_sendmsg+0xb7/0x140 [ 203.632294] do_syscall_64+0x35/0x80 [ 203.633045] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 203.634427] Freed by task 0: [ 203.635063] kasan_save_stack+0x1e/0x40 [ 203.635844] kasan_set_track+0x21/0x30 [ 203.636618] kasan_set_free_info+0x20/0x30 [ 203.637450] __kasan_slab_free+0xfc/0x140 [ 203.638271] kfree+0x94/0x3b0 [ 203.638903] rcu_core+0x5e4/0x1990 [ 203.639640] __do_softirq+0x1ba/0x5d3 [ 203.640828] Last potentially related work creation: [ 203.641785] kasan_save_stack+0x1e/0x40 [ 203.642571] __kasan_record_aux_stack+0x9f/0xb0 [ 203.643478] call_rcu+0x88/0x9c0 [ 203.644178] fib_release_info+0x539/0x750 [ 203.644997] fib_table_delete+0x659/0xb80 [ 203.645809] fib_magic.isra.0+0x1a3/0x2e0 [ 203.646617] fib_del_ifaddr+0x93f/0x1300 [ 203.647415] fib_inetaddr_event+0x9f/0x270 [ 203.648251] blocking_notifier_call_chain+0xd4/0x130 [ 203.649225] __inet_del_ifa+0x474/0xc10 [ 203.650016] devinet_ioctl+0x781/0x17f0 [ 203.650788] inet_ioctl+0x1ad/0x290 [ 203.651533] sock_do_ioctl+0xce/0x1c0 [ 203.652315] sock_ioctl+0x27b/0x4f0 [ 203.653058] __x64_sys_ioctl+0x124/0x190 [ 203.653850] do_syscall_64+0x35/0x80 [ 203.654608] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 203.666952] The buggy address belongs to the object at ffff888144df2000 which belongs to the cache kmalloc-256 of size 256 [ 203.669250] The buggy address is located 80 bytes inside of 256-byte region [ffff888144df2000, ffff888144df2100) [ 203.671332] The buggy address belongs to the page: [ 203.672273] page:00000000bf6c9314 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x144df0 [ 203.674009] head:00000000bf6c9314 order:2 compound_mapcount:0 compound_pincount:0 [ 203.675422] flags: 0x2ffff800010200(slab|head|node=0|zone=2|lastcpupid=0x1ffff) [ 203.676819] raw: 002ffff800010200 0000000000000000 dead000000000122 ffff888100042b40 [ 203.678384] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000 [ 203.679928] page dumped because: kasan: bad access detected [ 203.681455] Memory state around the buggy address: [ 203.682421] ffff888144df1f00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.683863] ffff888144df1f80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.685310] >ffff888144df2000: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 203.686701] ^ [ 203.687820] ffff888144df2080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb [ 203.689226] ffff888144df2100: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 203.690620] ================================================================== Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Reviewed-by: Leon Romanovsky Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 26 +++++++++++++++--------- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h | 5 ++++- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 4a6ec15ef046..bc77aba97ac1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -100,6 +100,12 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb) flush_workqueue(mp->wq); } +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi) +{ + mp->fib.mfi = fi; + mp->fib.priority = fi->fib_priority; +} + struct mlx5_fib_event_work { struct work_struct work; struct mlx5_lag *ldev; @@ -121,13 +127,13 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, /* Handle delete event */ if (event == FIB_EVENT_ENTRY_DEL) { /* stop track */ - if (mp->mfi == fi) - mp->mfi = NULL; + if (mp->fib.mfi == fi) + mp->fib.mfi = NULL; return; } /* Handle multipath entry with lower priority value */ - if (mp->mfi && mp->mfi != fi && fi->fib_priority >= mp->mfi->fib_priority) + if (mp->fib.mfi && mp->fib.mfi != fi && fi->fib_priority >= mp->fib.priority) return; /* Handle add/replace event */ @@ -145,7 +151,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, mlx5_lag_set_port_affinity(ldev, i); } - mp->mfi = fi; + mlx5_lag_fib_set(mp, fi); return; } @@ -165,7 +171,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* First time we see multipath route */ - if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { + if (!mp->fib.mfi && !__mlx5_lag_is_active(ldev)) { struct lag_tracker tracker; tracker = ldev->tracker; @@ -173,7 +179,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - mp->mfi = fi; + mlx5_lag_fib_set(mp, fi); } static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, @@ -184,7 +190,7 @@ static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, struct lag_mp *mp = &ldev->lag_mp; /* Check the nh event is related to the route */ - if (!mp->mfi || mp->mfi != fi) + if (!mp->fib.mfi || mp->fib.mfi != fi) return; /* nh added/removed */ @@ -313,7 +319,7 @@ void mlx5_lag_mp_reset(struct mlx5_lag *ldev) /* Clear mfi, as it might become stale when a route delete event * has been missed, see mlx5_lag_fib_route_event(). */ - ldev->lag_mp.mfi = NULL; + ldev->lag_mp.fib.mfi = NULL; } int mlx5_lag_mp_init(struct mlx5_lag *ldev) @@ -324,7 +330,7 @@ int mlx5_lag_mp_init(struct mlx5_lag *ldev) /* always clear mfi, as it might become stale when a route delete event * has been missed */ - mp->mfi = NULL; + mp->fib.mfi = NULL; if (mp->fib_nb.notifier_call) return 0; @@ -354,5 +360,5 @@ void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) unregister_fib_notifier(&init_net, &mp->fib_nb); destroy_workqueue(mp->wq); mp->fib_nb.notifier_call = NULL; - mp->mfi = NULL; + mp->fib.mfi = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h index 57af962cad29..143226753c3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -15,7 +15,10 @@ enum mlx5_lag_port_affinity { struct lag_mp { struct notifier_block fib_nb; - struct fib_info *mfi; /* used in tracking fib events */ + struct { + const void *mfi; /* used in tracking fib events */ + u32 priority; + } fib; struct workqueue_struct *wq; }; -- cgit v1.2.3 From a6589155ec9847918e00e7279b8aa6d4c272bea7 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:32:54 +0300 Subject: net/mlx5e: Lag, Fix fib_info pointer assignment Referenced change incorrectly sets single path fib_info even when LAG is not active. Fix it by moving call to mlx5_lag_fib_set() into conditional that verifies LAG state. Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index bc77aba97ac1..9a5884e8a8bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -149,9 +149,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, i++; mlx5_lag_set_port_affinity(ldev, i); + mlx5_lag_fib_set(mp, fi); } - mlx5_lag_fib_set(mp, fi); return; } -- cgit v1.2.3 From 4a2a664ed87962c4ddb806a84b5c9634820bcf55 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 18 Apr 2022 17:40:37 +0300 Subject: net/mlx5e: Lag, Don't skip fib events on current dst Referenced change added check to skip updating fib when new fib instance has same or lower priority. However, new fib instance can be an update on same dst address as existing one even though the structure is another instance that has different address. Ignoring events on such instances causes multipath LAG state to not be correctly updated. Track 'dst' and 'dst_len' fields of fib event fib_entry_notifier_info structure and don't skip events that have the same value of that fields. Fixes: ad11c4f1d8fd ("net/mlx5e: Lag, Only handle events from highest priority multipath entry") Signed-off-by: Vlad Buslov Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c | 20 ++++++++++++-------- drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h | 2 ++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c index 9a5884e8a8bf..d6c3e6dfd71f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -100,10 +100,12 @@ static void mlx5_lag_fib_event_flush(struct notifier_block *nb) flush_workqueue(mp->wq); } -static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi) +static void mlx5_lag_fib_set(struct lag_mp *mp, struct fib_info *fi, u32 dst, int dst_len) { mp->fib.mfi = fi; mp->fib.priority = fi->fib_priority; + mp->fib.dst = dst; + mp->fib.dst_len = dst_len; } struct mlx5_fib_event_work { @@ -116,10 +118,10 @@ struct mlx5_fib_event_work { }; }; -static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, - unsigned long event, - struct fib_info *fi) +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, unsigned long event, + struct fib_entry_notifier_info *fen_info) { + struct fib_info *fi = fen_info->fi; struct lag_mp *mp = &ldev->lag_mp; struct fib_nh *fib_nh0, *fib_nh1; unsigned int nhs; @@ -133,7 +135,9 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } /* Handle multipath entry with lower priority value */ - if (mp->fib.mfi && mp->fib.mfi != fi && fi->fib_priority >= mp->fib.priority) + if (mp->fib.mfi && mp->fib.mfi != fi && + (mp->fib.dst != fen_info->dst || mp->fib.dst_len != fen_info->dst_len) && + fi->fib_priority >= mp->fib.priority) return; /* Handle add/replace event */ @@ -149,7 +153,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, i++; mlx5_lag_set_port_affinity(ldev, i); - mlx5_lag_fib_set(mp, fi); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } return; @@ -179,7 +183,7 @@ static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, } mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - mlx5_lag_fib_set(mp, fi); + mlx5_lag_fib_set(mp, fi, fen_info->dst, fen_info->dst_len); } static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, @@ -220,7 +224,7 @@ static void mlx5_lag_fib_update(struct work_struct *work) case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_DEL: mlx5_lag_fib_route_event(ldev, fib_work->event, - fib_work->fen_info.fi); + &fib_work->fen_info); fib_info_put(fib_work->fen_info.fi); break; case FIB_EVENT_NH_ADD: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h index 143226753c3a..056a066da604 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -18,6 +18,8 @@ struct lag_mp { struct { const void *mfi; /* used in tracking fib events */ u32 priority; + u32 dst; + int dst_len; } fib; struct workqueue_struct *wq; }; -- cgit v1.2.3 From 087032ee7021a22e4c7557c0ed16bfd792c3f6fe Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Wed, 23 Feb 2022 21:29:17 +0200 Subject: net/mlx5e: TC, Fix ct_clear overwriting ct action metadata ct_clear action is translated to clearing reg_c metadata which holds ct state and zone information using mod header actions. These actions are allocated during the actions parsing, as part of the flow attributes main mod header action list. If ct action exists in the rule, the flow's main mod header is used only in the post action table rule, after the ct tables which set the ct info in the reg_c as part of the ct actions. Therefore, if the original rule has a ct_clear action followed by a ct action, the ct action reg_c setting will be done first and will be followed by the ct_clear resetting reg_c and overwriting the ct info. Fix this by moving the ct_clear mod header actions allocation from the ct action parsing stage to the ct action post parsing stage where it is already known if ct_clear is followed by a ct action. In such case, we skip the mod header actions allocation for the ct clear since the ct action will write to reg_c anyway after clearing it. Fixes: 806401c20a0f ("net/mlx5e: CT, Fix multiple allocations and memleak of mod acts") Signed-off-by: Ariel Levkovich Reviewed-by: Paul Blakey Reviewed-by: Roi Dayan Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- .../net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c | 34 ++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 20 ++++--------- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h | 11 +++++++ 3 files changed, 49 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c index b9d38fe807df..a829c94289c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/ct.c @@ -45,12 +45,41 @@ tc_act_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, if (mlx5e_is_eswitch_flow(parse_state->flow)) attr->esw_attr->split_count = attr->esw_attr->out_count; - if (!clear_action) { + if (clear_action) { + parse_state->ct_clear = true; + } else { attr->flags |= MLX5_ATTR_FLAG_CT; flow_flag_set(parse_state->flow, CT); parse_state->ct = true; } - parse_state->ct_clear = clear_action; + + return 0; +} + +static int +tc_act_post_parse_ct(struct mlx5e_tc_act_parse_state *parse_state, + struct mlx5e_priv *priv, + struct mlx5_flow_attr *attr) +{ + struct mlx5e_tc_mod_hdr_acts *mod_acts = &attr->parse_attr->mod_hdr_acts; + int err; + + /* If ct action exist, we can ignore previous ct_clear actions */ + if (parse_state->ct) + return 0; + + if (parse_state->ct_clear) { + err = mlx5_tc_ct_set_ct_clear_regs(parse_state->ct_priv, mod_acts); + if (err) { + NL_SET_ERR_MSG_MOD(parse_state->extack, + "Failed to set registers for ct clear"); + return err; + } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + + /* Prevent handling of additional, redundant clear actions */ + parse_state->ct_clear = false; + } return 0; } @@ -70,5 +99,6 @@ struct mlx5e_tc_act mlx5e_tc_act_ct = { .can_offload = tc_act_can_offload_ct, .parse_action = tc_act_parse_ct, .is_multi_table_act = tc_act_is_multi_table_act_ct, + .post_parse = tc_act_post_parse_ct, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index e49f51124c74..73a1e0a4818d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -582,6 +582,12 @@ mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv, return 0; } +int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); +} + static int mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act, char *modact) @@ -1410,9 +1416,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, const struct flow_action_entry *act, struct netlink_ext_ack *extack) { - bool clear_action = act->ct.action & TCA_CT_ACT_CLEAR; - int err; - if (!priv) { NL_SET_ERR_MSG_MOD(extack, "offload of ct action isn't available"); @@ -1423,17 +1426,6 @@ mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, attr->ct_attr.ct_action = act->ct.action; attr->ct_attr.nf_ft = act->ct.flow_table; - if (!clear_action) - goto out; - - err = mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Failed to set registers for ct clear"); - return err; - } - attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - -out: return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h index 36d3652bf829..00a3ba862afb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.h @@ -129,6 +129,10 @@ bool mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, struct sk_buff *skb, u8 zone_restore_id); +int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts); + #else /* CONFIG_MLX5_TC_CT */ static inline struct mlx5_tc_ct_priv * @@ -170,6 +174,13 @@ mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) return 0; } +static inline int +mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv, + struct mlx5e_tc_mod_hdr_acts *mod_acts) +{ + return -EOPNOTSUPP; +} + static inline int mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv, struct mlx5_flow_attr *attr, -- cgit v1.2.3 From e3fdc71bcb6ffe1d4870a89252ba296a9558e294 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 25 Apr 2022 17:12:12 +0300 Subject: net/mlx5e: TC, fix decap fallback to uplink when int port not supported When resolving the decap route device for a tunnel decap rule, the result may be an OVS internal port device. Prior to adding the support for internal port offload, such case would result in using the uplink as the default decap route device which allowed devices that can't support internal port offload to offload this decap rule. This behavior got broken by adding the internal port offload which will fail in case the device can't support internal port offload. To restore the old behavior, use the uplink device as the decap route as before when internal port offload is not supported. Fixes: b16eb3c81fe2 ("net/mlx5: Support internal port as decap route device") Signed-off-by: Ariel Levkovich Reviewed-by: Maor Dickman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 378fc8e3bd97..d87bbb0be7c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -713,6 +713,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, struct net_device *filter_dev) { struct mlx5_esw_flow_attr *esw_attr = flow_attr->esw_attr; + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5e_tc_int_port *int_port; TC_TUN_ROUTE_ATTR_INIT(attr); u16 vport_num; @@ -747,7 +748,7 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv, esw_attr->rx_tun_attr->vni = MLX5_GET(fte_match_param, spec->match_value, misc_parameters.vxlan_vni); esw_attr->rx_tun_attr->decap_vport = vport_num; - } else if (netif_is_ovs_master(attr.route_dev)) { + } else if (netif_is_ovs_master(attr.route_dev) && mlx5e_tc_int_port_supported(esw)) { int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv), attr.route_dev->ifindex, MLX5E_TC_INT_PORT_INGRESS); -- cgit v1.2.3 From b069e14fff46c8da9fcc79957f8acaa3e2dfdb6b Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Tue, 29 Mar 2022 17:42:46 +0300 Subject: net/mlx5e: CT: Fix queued up restore put() executing after relevant ft release __mlx5_tc_ct_entry_put() queues release of tuple related to some ct FT, if that is the last reference to that tuple, the actual deletion of the tuple can happen after the FT is already destroyed and freed. Flush the used workqueue before destroying the ct FT. Fixes: a2173131526d ("net/mlx5e: CT: manage the lifetime of the ct entry object") Reviewed-by: Oz Shlomo Signed-off-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 73a1e0a4818d..ab4b0f3ee2a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -1741,6 +1741,8 @@ mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) static void mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) { + struct mlx5e_priv *priv; + if (!refcount_dec_and_test(&ft->refcount)) return; @@ -1750,6 +1752,8 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft) rhashtable_free_and_destroy(&ft->ct_entries_ht, mlx5_tc_ct_flush_ft_entry, ct_priv); + priv = netdev_priv(ct_priv->netdev); + flush_workqueue(priv->wq); mlx5_tc_ct_free_pre_ct_tables(ft); mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id); kfree(ft); -- cgit v1.2.3 From 0e322efd64d2ad11a773f7a314512086ebbe000c Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 28 Mar 2022 18:29:13 +0300 Subject: net/mlx5e: Avoid checking offload capability in post_parse action During TC action parsing, the can_offload callback is called before calling the action's main parsing callback. Later on, the can_offload callback is called again before handling the action's post_parse callback if exists. Since the main parsing callback might have changed and set parsing params for the rule, following can_offload checks might fail because some parsing params were already set. Specifically, the ct action main parsing sets the ct param in the parsing status structure and when the second can_offload for ct action is called, before handling the ct post parsing, it will return an error since it checks this ct param to indicate multiple ct actions which are not supported. Therefore, the can_offload call is removed from the post parsing handling to prevent such cases. This is allowed since the first can_offload call will ensure that the action can be offloaded and the fact the code reached the post parsing handling already means that the action can be offloaded. Fixes: 8300f225268b ("net/mlx5e: Create new flow attr for multi table actions") Signed-off-by: Ariel Levkovich Reviewed-by: Paul Blakey Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c index af37a8d247a1..2755c25ba324 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.c @@ -145,8 +145,7 @@ mlx5e_tc_act_post_parse(struct mlx5e_tc_act_parse_state *parse_state, flow_action_for_each(i, act, flow_action) { tc_act = mlx5e_tc_act_get(act->id, ns_type); - if (!tc_act || !tc_act->post_parse || - !tc_act->can_offload(parse_state, act, i, attr)) + if (!tc_act || !tc_act->post_parse) continue; err = tc_act->post_parse(parse_state, priv, attr); -- cgit v1.2.3 From b781bff882d16175277ca129c382886cb4c74a2c Mon Sep 17 00:00:00 2001 From: Moshe Tal Date: Wed, 9 Feb 2022 19:23:56 +0200 Subject: net/mlx5e: Fix trust state reset in reload Setting dscp2prio during the driver reload can cause dcb ieee app list to be not empty after the reload finish and as a result to a conflict between the priority trust state reported by the app and the state in the device register. Reset the dcb ieee app list on initialization in case this is conflicting with the register status. Fixes: 2a5e7a1344f4 ("net/mlx5e: Add dcbnl dscp to priority support") Signed-off-by: Moshe Tal Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c index d659fe07d464..8ead2c82a52a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c @@ -1200,6 +1200,16 @@ static int mlx5e_trust_initialize(struct mlx5e_priv *priv) return err; WRITE_ONCE(priv->dcbx_dp.trust_state, trust_state); + if (priv->dcbx_dp.trust_state == MLX5_QPTS_TRUST_PCP && priv->dcbx.dscp_app_cnt) { + /* + * Align the driver state with the register state. + * Temporary state change is required to enable the app list reset. + */ + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_DSCP; + mlx5e_dcbnl_delete_app(priv); + priv->dcbx_dp.trust_state = MLX5_QPTS_TRUST_PCP; + } + mlx5e_params_calc_trust_tx_min_inline_mode(priv->mdev, &priv->channels.params, priv->dcbx_dp.trust_state); -- cgit v1.2.3 From cb7786a76ea39f394f0a059787fe24fa8e340fb6 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Apr 2022 21:31:06 +0300 Subject: net/mlx5: Fix deadlock in sync reset flow The sync reset flow can lead to the following deadlock when poll_sync_reset() is called by timer softirq and waiting on del_timer_sync() for the same timer. Fix that by moving the part of the flow that waits for the timer to reset_reload_work. It fixes the following kernel Trace: RIP: 0010:del_timer_sync+0x32/0x40 ... Call Trace: mlx5_sync_reset_clear_reset_requested+0x26/0x50 [mlx5_core] poll_sync_reset.cold+0x36/0x52 [mlx5_core] call_timer_fn+0x32/0x130 __run_timers.part.0+0x180/0x280 ? tick_sched_handle+0x33/0x60 ? tick_sched_timer+0x3d/0x80 ? ktime_get+0x3e/0xa0 run_timer_softirq+0x2a/0x50 __do_softirq+0xe1/0x2d6 ? hrtimer_interrupt+0x136/0x220 irq_exit+0xae/0xb0 smp_apic_timer_interrupt+0x7b/0x140 apic_timer_interrupt+0xf/0x20 Fixes: 3c5193a87b0f ("net/mlx5: Use del_timer_sync in fw reset flow of halting poll") Signed-off-by: Moshe Shemesh Reviewed-by: Maher Sanalla Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 34 +++++++++++----------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 4aa22dce9b77..ec18d4ccbc11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -155,22 +155,6 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) } } -static void mlx5_sync_reset_reload_work(struct work_struct *work) -{ - struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, - reset_reload_work); - struct mlx5_core_dev *dev = fw_reset->dev; - int err; - - mlx5_enter_error_state(dev, true); - mlx5_unload_one(dev); - err = mlx5_health_wait_pci_up(dev); - if (err) - mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); - fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev); -} - static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; @@ -188,6 +172,23 @@ static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, boo mlx5_start_health_poll(dev); } +static void mlx5_sync_reset_reload_work(struct work_struct *work) +{ + struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, + reset_reload_work); + struct mlx5_core_dev *dev = fw_reset->dev; + int err; + + mlx5_sync_reset_clear_reset_requested(dev, false); + mlx5_enter_error_state(dev, true); + mlx5_unload_one(dev); + err = mlx5_health_wait_pci_up(dev); + if (err) + mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); + fw_reset->ret = err; + mlx5_fw_reset_complete_reload(dev); +} + #define MLX5_RESET_POLL_INTERVAL (HZ / 10) static void poll_sync_reset(struct timer_list *t) { @@ -202,7 +203,6 @@ static void poll_sync_reset(struct timer_list *t) if (fatal_error) { mlx5_core_warn(dev, "Got Device Reset\n"); - mlx5_sync_reset_clear_reset_requested(dev, false); queue_work(fw_reset->wq, &fw_reset->reset_reload_work); return; } -- cgit v1.2.3 From fc3d3db07b35885f238e1fa06b9f04a8fa7a62d0 Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Mon, 11 Apr 2022 20:38:44 +0300 Subject: net/mlx5: Avoid double clear or set of sync reset requested Double clear of reset requested state can lead to NULL pointer as it will try to delete the timer twice. This can happen for example on a race between abort from FW and pci error or reset. Avoid such case using test_and_clear_bit() to verify only one time reset requested state clear flow. Similarly use test_and_set_bit() to verify only one time reset requested state set flow. Fixes: 7dd6df329d4c ("net/mlx5: Handle sync reset abort event") Signed-off-by: Moshe Shemesh Reviewed-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c | 28 +++++++++++++++------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index ec18d4ccbc11..ca1aba845dd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -162,14 +162,19 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev) del_timer_sync(&fw_reset->timer); } -static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) +static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already cleared\n"); + return -EALREADY; + } + mlx5_stop_sync_reset_poll(dev); - clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); if (poll_health) mlx5_start_health_poll(dev); + return 0; } static void mlx5_sync_reset_reload_work(struct work_struct *work) @@ -229,13 +234,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false); } -static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) +static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) { + mlx5_core_warn(dev, "Reset request was already set\n"); + return -EALREADY; + } mlx5_stop_health_poll(dev, true); - set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags); mlx5_start_sync_reset_poll(dev); + return 0; } static void mlx5_fw_live_patch_event(struct work_struct *work) @@ -264,7 +273,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work) err ? "Failed" : "Sent"); return; } - mlx5_sync_reset_set_reset_requested(dev); + if (mlx5_sync_reset_set_reset_requested(dev)) + return; + err = mlx5_fw_reset_set_reset_sync_ack(dev); if (err) mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err); @@ -362,7 +373,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) struct mlx5_core_dev *dev = fw_reset->dev; int err; - mlx5_sync_reset_clear_reset_requested(dev, false); + if (mlx5_sync_reset_clear_reset_requested(dev, false)) + return; mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n"); @@ -391,10 +403,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work) reset_abort_work); struct mlx5_core_dev *dev = fw_reset->dev; - if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) + if (mlx5_sync_reset_clear_reset_requested(dev, true)) return; - - mlx5_sync_reset_clear_reset_requested(dev, true); mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n"); } -- cgit v1.2.3 From a042d7f5bb68c47f6e0e546ca367d14e1e4b25ba Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Sun, 10 Apr 2022 11:58:05 +0000 Subject: net/mlx5: Fix matching on inner TTC The cited commits didn't use proper matching on inner TTC as a result distribution of encapsulated packets wasn't symmetric between the physical ports. Fixes: 4c71ce50d2fe ("net/mlx5: Support partial TTC rules") Fixes: 8e25a2bc6687 ("net/mlx5: Lag, add support to create TTC tables for LAG port selection") Signed-off-by: Mark Bloch Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index a6592f9c3c05..5be322528279 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -505,7 +505,7 @@ static int mlx5_lag_create_inner_ttc_table(struct mlx5_lag *ldev) struct ttc_params ttc_params = {}; mlx5_lag_set_inner_ttc_params(ldev, &ttc_params); - port_sel->inner.ttc = mlx5_create_ttc_table(dev, &ttc_params); + port_sel->inner.ttc = mlx5_create_inner_ttc_table(dev, &ttc_params); if (IS_ERR(port_sel->inner.ttc)) return PTR_ERR(port_sel->inner.ttc); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index b63dec24747a..b78f2ba25c19 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -408,6 +408,8 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, for (tt = 0; tt < MLX5_NUM_TT; tt++) { struct mlx5_ttc_rule *rule = &rules[tt]; + if (test_bit(tt, params->ignore_dests)) + continue; rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, ¶ms->dests[tt], ttc_rules[tt].etype, -- cgit v1.2.3