Blob Blame History Raw
From: Craig Gallek <kraig@google.com>
Date: Mon, 18 Sep 2017 15:30:55 -0400
Subject: bpf: Implement map_delete_elem for BPF_MAP_TYPE_LPM_TRIE
Patch-mainline: v4.15-rc1
Git-commit: e454cf5958538666635488030046b6a84a22d447
References: bsc#1083647

This is a simple non-recursive delete operation.  It prunes paths
of empty nodes in the tree, but it does not try to further compress
the tree as nodes are removed.

Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Gary Lin <glin@suse.com>
---
 kernel/bpf/lpm_trie.c |   80 ++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 3 deletions(-)

--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -388,10 +388,84 @@ out:
 	return ret;
 }
 
-static int trie_delete_elem(struct bpf_map *map, void *key)
+/* Called from syscall or from eBPF program */
+static int trie_delete_elem(struct bpf_map *map, void *_key)
 {
-	/* TODO */
-	return -ENOSYS;
+	struct lpm_trie *trie = container_of(map, struct lpm_trie, map);
+	struct bpf_lpm_trie_key *key = _key;
+	struct lpm_trie_node __rcu **trim;
+	struct lpm_trie_node *node;
+	unsigned long irq_flags;
+	unsigned int next_bit;
+	size_t matchlen = 0;
+	int ret = 0;
+
+	if (key->prefixlen > trie->max_prefixlen)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&trie->lock, irq_flags);
+
+	/* Walk the tree looking for an exact key/length match and keeping
+	 * track of where we could begin trimming the tree.  The trim-point
+	 * is the sub-tree along the walk consisting of only single-child
+	 * intermediate nodes and ending at a leaf node that we want to
+	 * remove.
+	 */
+	trim = &trie->root;
+	node = rcu_dereference_protected(
+		trie->root, lockdep_is_held(&trie->lock));
+	while (node) {
+		matchlen = longest_prefix_match(trie, node, key);
+
+		if (node->prefixlen != matchlen ||
+		    node->prefixlen == key->prefixlen)
+			break;
+
+		next_bit = extract_bit(key->data, node->prefixlen);
+		/* If we hit a node that has more than one child or is a valid
+		 * prefix itself, do not remove it. Reset the root of the trim
+		 * path to its descendant on our path.
+		 */
+		if (!(node->flags & LPM_TREE_NODE_FLAG_IM) ||
+		    (node->child[0] && node->child[1]))
+			trim = &node->child[next_bit];
+		node = rcu_dereference_protected(
+			node->child[next_bit], lockdep_is_held(&trie->lock));
+	}
+
+	if (!node || node->prefixlen != key->prefixlen ||
+	    (node->flags & LPM_TREE_NODE_FLAG_IM)) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	trie->n_entries--;
+
+	/* If the node we are removing is not a leaf node, simply mark it
+	 * as intermediate and we are done.
+	 */
+	if (rcu_access_pointer(node->child[0]) ||
+	    rcu_access_pointer(node->child[1])) {
+		node->flags |= LPM_TREE_NODE_FLAG_IM;
+		goto out;
+	}
+
+	/* trim should now point to the slot holding the start of a path from
+	 * zero or more intermediate nodes to our leaf node for deletion.
+	 */
+	while ((node = rcu_dereference_protected(
+			*trim, lockdep_is_held(&trie->lock)))) {
+		RCU_INIT_POINTER(*trim, NULL);
+		trim = rcu_access_pointer(node->child[0]) ?
+			&node->child[0] :
+			&node->child[1];
+		kfree_rcu(node, rcu);
+	}
+
+out:
+	raw_spin_unlock_irqrestore(&trie->lock, irq_flags);
+
+	return ret;
 }
 
 #define LPM_DATA_SIZE_MAX	256