Blob Blame History Raw
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 10 Sep 2019 14:38:17 +0300
Subject: net/mlx5e: Don't store direct pointer to action's tunnel info
Patch-mainline: v5.4-rc6
Git-commit: 2a4b6526236791a1bb8092079ad87a1629e78db5
References: jsc#SLE-8464

Geneve implementation changed mlx5 tc to user direct pointer to tunnel_key
action's internal struct ip_tunnel_info instance. However, this leads to
use-after-free error when initial filter that caused creation of new encap
entry is deleted or when tunnel_key action is manually overwritten through
action API. Moreover, with recent TC offloads API unlocking change struct
flow_action_entry->tunnel point to temporal copy of tunnel info that is
deallocated after filter is offloaded to hardware which causes bug to
reproduce every time new filter is attached to existing encap entry with
following KASAN bug:

[  314.885555] ==================================================================
[  314.886641] BUG: KASAN: use-after-free in memcmp+0x2c/0x60
[  314.886864] Read of size 1 at addr ffff88886c746280 by task tc/2682

[  314.887179] CPU: 22 PID: 2682 Comm: tc Not tainted 5.3.0-rc7+ #703
[  314.887188] Hardware name: Supermicro SYS-2028TP-DECR/X10DRT-P, BIOS 2.0b 03/30/2017
[  314.887195] Call Trace:
[  314.887215]  dump_stack+0x9a/0xf0
[  314.887236]  print_address_description+0x67/0x323
[  314.887248]  ? memcmp+0x2c/0x60
[  314.887257]  ? memcmp+0x2c/0x60
[  314.887272]  __kasan_report.cold+0x1a/0x3d
[  314.887474]  ? __mlx5e_tc_del_fdb_peer_flow+0x100/0x1b0 [mlx5_core]
[  314.887484]  ? memcmp+0x2c/0x60
[  314.887509]  kasan_report+0xe/0x12
[  314.887521]  memcmp+0x2c/0x60
[  314.887662]  mlx5e_tc_add_fdb_flow+0x51b/0xbe0 [mlx5_core]
[  314.887838]  ? mlx5e_encap_take+0x110/0x110 [mlx5_core]
[  314.887902]  ? lockdep_init_map+0x87/0x2c0
[  314.887924]  ? __init_waitqueue_head+0x4f/0x60
[  314.888062]  ? mlx5e_alloc_flow.isra.0+0x18c/0x1c0 [mlx5_core]
[  314.888207]  __mlx5e_add_fdb_flow+0x2d7/0x440 [mlx5_core]
[  314.888359]  ? mlx5e_tc_update_neigh_used_value+0x6f0/0x6f0 [mlx5_core]
[  314.888374]  ? match_held_lock+0x2e/0x240
[  314.888537]  mlx5e_configure_flower+0x830/0x16a0 [mlx5_core]
[  314.888702]  ? __mlx5e_add_fdb_flow+0x440/0x440 [mlx5_core]
[  314.888713]  ? down_read+0x118/0x2c0
[  314.888728]  ? down_read_killable+0x300/0x300
[  314.888882]  ? mlx5e_rep_get_ethtool_stats+0x180/0x180 [mlx5_core]
[  314.888899]  tc_setup_cb_add+0x127/0x270
[  314.888937]  fl_hw_replace_filter+0x2ac/0x380 [cls_flower]
[  314.888976]  ? fl_hw_destroy_filter+0x1b0/0x1b0 [cls_flower]
[  314.888990]  ? fl_change+0xbcf/0x27ef [cls_flower]
[  314.889030]  ? fl_change+0xa57/0x27ef [cls_flower]
[  314.889069]  fl_change+0x16bd/0x27ef [cls_flower]
[  314.889135]  ? __rhashtable_insert_fast.constprop.0+0xa00/0xa00 [cls_flower]
[  314.889167]  ? __radix_tree_lookup+0xa4/0x130
[  314.889200]  ? fl_get+0x169/0x240 [cls_flower]
[  314.889218]  ? fl_walk+0x230/0x230 [cls_flower]
[  314.889249]  tc_new_tfilter+0x5e1/0xd40
[  314.889281]  ? __rhashtable_insert_fast.constprop.0+0xa00/0xa00 [cls_flower]
[  314.889309]  ? tc_del_tfilter+0xa30/0xa30
[  314.889335]  ? __lock_acquire+0x5b5/0x2460
[  314.889378]  ? find_held_lock+0x85/0xa0
[  314.889442]  ? tc_del_tfilter+0xa30/0xa30
[  314.889465]  rtnetlink_rcv_msg+0x4ab/0x5f0
[  314.889488]  ? rtnl_dellink+0x490/0x490
[  314.889518]  ? lockdep_hardirqs_on+0x260/0x260
[  314.889538]  ? netlink_deliver_tap+0xab/0x5a0
[  314.889550]  ? match_held_lock+0x1b/0x240
[  314.889575]  netlink_rcv_skb+0xd0/0x200
[  314.889588]  ? rtnl_dellink+0x490/0x490
[  314.889605]  ? netlink_ack+0x440/0x440
[  314.889635]  ? netlink_deliver_tap+0x161/0x5a0
[  314.889648]  ? lock_downgrade+0x360/0x360
[  314.889657]  ? lock_acquire+0xe5/0x210
[  314.889686]  netlink_unicast+0x296/0x350
[  314.889707]  ? netlink_attachskb+0x390/0x390
[  314.889726]  ? _copy_from_iter_full+0xe0/0x3a0
[  314.889738]  ? __virt_addr_valid+0xbb/0x130
[  314.889771]  netlink_sendmsg+0x394/0x600
[  314.889800]  ? netlink_unicast+0x350/0x350
[  314.889817]  ? move_addr_to_kernel.part.0+0x90/0x90
[  314.889852]  ? netlink_unicast+0x350/0x350
[  314.889872]  sock_sendmsg+0x96/0xa0
[  314.889891]  ___sys_sendmsg+0x482/0x520
[  314.889919]  ? copy_msghdr_from_user+0x250/0x250
[  314.889930]  ? __fput+0x1fa/0x390
[  314.889941]  ? task_work_run+0xb7/0xf0
[  314.889957]  ? exit_to_usermode_loop+0x117/0x120
[  314.889972]  ? entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  314.889982]  ? do_syscall_64+0x74/0xe0
[  314.889992]  ? entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  314.890012]  ? mark_lock+0xac/0x9a0
[  314.890028]  ? __lock_acquire+0x5b5/0x2460
[  314.890053]  ? mark_lock+0xac/0x9a0
[  314.890083]  ? __lock_acquire+0x5b5/0x2460
[  314.890112]  ? match_held_lock+0x1b/0x240
[  314.890144]  ? __fget_light+0xa1/0xf0
[  314.890166]  ? sockfd_lookup_light+0x91/0xb0
[  314.890187]  __sys_sendmsg+0xba/0x130
[  314.890201]  ? __sys_sendmsg_sock+0xb0/0xb0
[  314.890225]  ? __blkcg_punt_bio_submit+0xd0/0xd0
[  314.890264]  ? lockdep_hardirqs_off+0xbe/0x100
[  314.890274]  ? mark_held_locks+0x24/0x90
[  314.890286]  ? do_syscall_64+0x1e/0xe0
[  314.890308]  do_syscall_64+0x74/0xe0
[  314.890325]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[  314.890336] RIP: 0033:0x7f00ca33d7b8
[  314.890348] Code: 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e fa 48 8d 05 65 8f 0c 00 8b 00 85 c0 75 17 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 58 c3 0f 1f 80 00 00 00 00 48 83 ec 28 89 5
4
[  314.890356] RSP: 002b:00007ffea2983928 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  314.890369] RAX: ffffffffffffffda RBX: 000000005d777d5b RCX: 00007f00ca33d7b8
[  314.890377] RDX: 0000000000000000 RSI: 00007ffea2983990 RDI: 0000000000000003
[  314.890384] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000006
[  314.890392] R10: 0000000000404eda R11: 0000000000000246 R12: 0000000000000001
[  314.890400] R13: 000000000047f640 R14: 00007ffea2987b58 R15: 0000000000000021

[  314.890529] Allocated by task 2687:
[  314.890684]  save_stack+0x1b/0x80
[  314.890694]  __kasan_kmalloc.constprop.0+0xc2/0xd0
[  314.890705]  __kmalloc_track_caller+0x102/0x340
[  314.890721]  kmemdup+0x1d/0x40
[  314.890730]  tc_setup_flow_action+0x731/0x2c27
[  314.890743]  fl_hw_replace_filter+0x23b/0x380 [cls_flower]
[  314.890756]  fl_change+0x16bd/0x27ef [cls_flower]
[  314.890765]  tc_new_tfilter+0x5e1/0xd40
[  314.890776]  rtnetlink_rcv_msg+0x4ab/0x5f0
[  314.890786]  netlink_rcv_skb+0xd0/0x200
[  314.890796]  netlink_unicast+0x296/0x350
[  314.890805]  netlink_sendmsg+0x394/0x600
[  314.890815]  sock_sendmsg+0x96/0xa0
[  314.890825]  ___sys_sendmsg+0x482/0x520
[  314.890834]  __sys_sendmsg+0xba/0x130
[  314.890844]  do_syscall_64+0x74/0xe0
[  314.890854]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

[  314.890937] Freed by task 2687:
[  314.891076]  save_stack+0x1b/0x80
[  314.891086]  __kasan_slab_free+0x12c/0x170
[  314.891095]  kfree+0xeb/0x2f0
[  314.891106]  tc_cleanup_flow_action+0x69/0xa0
[  314.891119]  fl_hw_replace_filter+0x2c5/0x380 [cls_flower]
[  314.891132]  fl_change+0x16bd/0x27ef [cls_flower]
[  314.891140]  tc_new_tfilter+0x5e1/0xd40
[  314.891151]  rtnetlink_rcv_msg+0x4ab/0x5f0
[  314.891161]  netlink_rcv_skb+0xd0/0x200
[  314.891170]  netlink_unicast+0x296/0x350
[  314.891180]  netlink_sendmsg+0x394/0x600
[  314.891190]  sock_sendmsg+0x96/0xa0
[  314.891200]  ___sys_sendmsg+0x482/0x520
[  314.891208]  __sys_sendmsg+0xba/0x130
[  314.891218]  do_syscall_64+0x74/0xe0
[  314.891228]  entry_SYSCALL_64_after_hwframe+0x49/0xbe

[  314.891315] The buggy address belongs to the object at ffff88886c746280
                which belongs to the cache kmalloc-96 of size 96
[  314.891762] The buggy address is located 0 bytes inside of
                96-byte region [ffff88886c746280, ffff88886c7462e0)
[  314.892196] The buggy address belongs to the page:
[  314.892387] page:ffffea0021b1d180 refcount:1 mapcount:0 mapping:ffff88835d00ef80 index:0x0
[  314.892398] flags: 0x57ffffc0000200(slab)
[  314.892413] raw: 0057ffffc0000200 ffffea00219e0340 0000000800000008 ffff88835d00ef80
[  314.892423] raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
[  314.892430] page dumped because: kasan: bad access detected

[  314.892515] Memory state around the buggy address:
[  314.892707]  ffff88886c746180: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
[  314.892976]  ffff88886c746200: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
[  314.893251] >ffff88886c746280: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
[  314.893522]                    ^
[  314.893657]  ffff88886c746300: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
[  314.893924]  ffff88886c746380: 00 00 00 00 00 00 00 00 00 fc fc fc fc fc fc fc
[  314.894189] ==================================================================

Fix the issue by duplicating tunnel info into per-encap copy that is
deallocated with encap structure. Also, duplicate tunnel info in flow parse
attribute to support cases when flow might be attached asynchronously.

Fixes: 1f6da30697d0 ("net/mlx5e: Geneve, Keep tunnel info as pointer to the original struct")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Yevgeny Kliteynik <kliteyn@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |   34 +++++++++++++++++++-----
 1 file changed, 27 insertions(+), 7 deletions(-)

--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1278,8 +1278,10 @@ static void mlx5e_tc_del_fdb_flow(struct
 	mlx5_eswitch_del_vlan_action(esw, attr);
 
 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
-		if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+		if (attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP) {
 			mlx5e_detach_encap(priv, flow, out_index);
+			kfree(attr->parse_attr->tun_info[out_index]);
+		}
 	kvfree(attr->parse_attr);
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
@@ -1559,6 +1561,7 @@ static void mlx5e_encap_dealloc(struct m
 			mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
 	}
 
+	kfree(e->tun_info);
 	kfree(e->encap_header);
 	kfree_rcu(e, rcu);
 }
@@ -2972,6 +2975,13 @@ mlx5e_encap_get(struct mlx5e_priv *priv,
 	return NULL;
 }
 
+static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
+{
+	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
+
+	return kmemdup(tun_info, tun_size, GFP_KERNEL);
+}
+
 static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow,
 			      struct net_device *mirred_dev,
@@ -3028,13 +3038,15 @@ static int mlx5e_attach_encap(struct mlx
 	refcount_set(&e->refcnt, 1);
 	init_completion(&e->res_ready);
 
+	tun_info = dup_tun_info(tun_info);
+	if (!tun_info) {
+		err = -ENOMEM;
+		goto out_err_init;
+	}
 	e->tun_info = tun_info;
 	err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
-	if (err) {
-		kfree(e);
-		e = NULL;
-		goto out_err;
-	}
+	if (err)
+		goto out_err_init;
 
 	INIT_LIST_HEAD(&e->flows);
 	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
@@ -3075,6 +3087,12 @@ out_err:
 	if (e)
 		mlx5e_encap_put(priv, e);
 	return err;
+
+out_err_init:
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+	kfree(tun_info);
+	kfree(e);
+	return err;
 }
 
 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
@@ -3295,7 +3313,9 @@ static int parse_tc_fdb_actions(struct m
 			} else if (encap) {
 				parse_attr->mirred_ifindex[attr->out_count] =
 					out_dev->ifindex;
-				parse_attr->tun_info[attr->out_count] = info;
+				parse_attr->tun_info[attr->out_count] = dup_tun_info(info);
+				if (!parse_attr->tun_info[attr->out_count])
+					return -ENOMEM;
 				encap = false;
 				attr->dests[attr->out_count].flags |=
 					MLX5_ESW_DEST_ENCAP;