Blob Blame History Raw
From 2fa53e0c9fd9d8a7bf28d98bc0ca0592da4429e1 Mon Sep 17 00:00:00 2001
From: Nicolai Stange <nstange@suse.de>
Date: Thu, 28 Oct 2021 10:36:58 +0200
Subject: [PATCH 2/2] char/random: reinstantiate DRBGs once optimized sha512
 becomes available
References: jsc#SLE-21132,bsc#1191259,bsc#1195160
Patch-mainline: Never, downstream band-aid to get SP800-90B for userspace

The previous patch wired up the in-kernel DRBG implementations to
/dev/random & Co if in FIPS mode as a downstream solution in order to
achieve SP800-90 compliance.

The "drbg_nopr_sha512" DRBGs get instantiated lazily per NUMA node upon
first usage, i.e. upon first access of the /dev/{u,}random or getrandom(2)
userspace interfaces. At that time however, only the built-in generic
sha512 implementation is available, which is subpar in terms of performance
when compared to the alternatives tuned for arch specific instruction sets.
It follows that these DRBGs instantiated early will use the generic sha512
implementation under the hood throughout the whole system uptime. As sha512
is being used extensively for "drbg_nopr_sha512" generation, its
performance is critical to /dev/random & Co throughput.

The crypto subsystem does provide a notification mechanism on when a new
algorithm implementation becomes available: the CRYPTO_MSG_ALG_LOADED
notifier message, which would e.g. get sent whenever some crypto *.ko
gets loaded and the crypto algorithms provided therefrom have passed their
selftests.

Make our downstream char/random.c customization to listen for this message
and let it reinstantiate the DRBGs if the newly available algorithm happens
to be a sha512 implementation.

More specifically, introduce a new flag, fips_do_drbg_algs_reset, and
set it from the registered notifier_block upon receiving a
CRYPTO_MSG_ALG_LOADED for sha512.

Make fips_drbg_tfm_get(), the primitive supposed to retrieve a DRBG
instance for its caller, check this flag and invoke the new
fips_reset_drbg_algs() to reset all previously allocated DRBG instances if
found being set.

Note that some of those previously allocated DRBG instances could
potentially be in active use by concurrent reads from /dev/random & Co and
thus, fips_reset_drbg_algs() cannot simply deallocate them via
crypto_free_rng().

Make the DRBG instances refcounted and protect the refcount operations with
RCU. To this end, wrap the global, per NUMA node fips_drbg_tfms[] pointers
to the DRBG crypto_rng TFMs in a new, reference counted struct
fips_drbg_tfm. fips_drbg_tfms[] itself will own a reference on each of its
elements. The aforementioned fips_reset_drbg_algs() will release that
reference owned by fips_drbg_tfms[] only after a RCU grace period has
elapsed.

Make fips_drbg_tfm_get() grab a reference on the DRBG instance retrieved
for its caller under RCU protection. Introduce the new fips_drbg_tfm_put()
for dropping that reference again and potentially deallocate the associated
DRBG instance once it has reached zero.

Finally note that for robustness reasons, the DRBG for NUMA node 0 is
supposed to be always available as a resort in case the lazy allocation for
any of the other NUMA nodes fails, c.f. the previous patch wiring up the
DRBGs to /dev/random & Co. In order to maintain this guarantee, make
fips_reset_drbg_algs() to first attempt to instantiate a new DRBG using
the alternative sha512 implementation for NUMA node 0 first and abort the
whole reset operation if that fails.

For comparison, the following throughputs have been measured in a x86_64 VM
before this patch with fips=1:
  read size       throughput
  --------------------------
  8B              2.72MB/s
  16B             5.44MB/s
  32B             10.88MB/s
  64B             21.61MB/s
  128B            35.19MB/s
  256B            51.71MB/s
  512B            55.78MB/s
  1kB             58.15MB/s
  32MB            60.57MB/s

And with this patch applied:
  read size       throughput    improvement
  -----------------------------------------
  8B              3.28MB/s      21%
  16B             6.55MB/s      20%
  32B             13.09MB/s     20%
  64B             26.14MB/s     21%
  128B            43.15MB/s     23%
  256B            63.76MB/s     23%
  512B            69.58MB/s     25%
  1kB             73.49MB/s     26%
  32MB            77.23MB/s     28%

As can be seen from these numbers, throughput improved quite substantially,
in particular for read sizes > 64B, i.e. the sha512 digest size.

Signed-off-by: Nicolai Stange <nstange@suse.de>
---
 drivers/char/random.c |  168 +++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 147 insertions(+), 21 deletions(-)

--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -752,35 +752,56 @@ static int credit_entropy_bits_safe(stru
  *********************************************************************/
 static DEFINE_MUTEX(fips_init_mtx);
 
-static struct crypto_rng **fips_drbg_tfms;
+static unsigned int fips_do_drbg_algs_reset;
 
-static void __fips_drbg_tfm_free(struct crypto_rng *tfm)
+struct fips_drbg_tfm
 {
-	crypto_free_rng(tfm);
+	struct crypto_rng *tfm;
+	refcount_t ref;
+	struct list_head reset_list;
+};
+
+static struct fips_drbg_tfm __rcu **fips_drbg_tfms;
+
+static void __fips_drbg_tfm_free(struct fips_drbg_tfm *drbg_tfm)
+{
+	crypto_free_rng(drbg_tfm->tfm);
+	kfree(drbg_tfm);
 }
 
-static struct crypto_rng* __fips_drbg_tfm_alloc(void)
+static struct fips_drbg_tfm* __fips_drbg_tfm_alloc(void)
 {
+	struct fips_drbg_tfm *drbg_tfm;
 	struct crypto_rng *tfm;
 	int r;
 
+	drbg_tfm = kmalloc(sizeof(*drbg_tfm), GFP_KERNEL);
+	if (!drbg_tfm)
+		return ERR_PTR(-ENOMEM);
+
+	/* One for the reference from fips_drbg_tfms[]. */
+	refcount_set(&drbg_tfm->ref, 1);
+
 	tfm = crypto_alloc_rng("drbg_nopr_sha512", 0, 0);
-	if (IS_ERR(tfm))
-		return tfm;
+	if (IS_ERR(tfm)) {
+		kfree(drbg_tfm);
+		return (void *)tfm;
+	}
+	drbg_tfm->tfm = tfm;
 
 	r = crypto_rng_reset(tfm, NULL, crypto_rng_seedsize(tfm));
 	if (r) {
-		__fips_drbg_tfm_free(tfm);
+		__fips_drbg_tfm_free(drbg_tfm);
 		return ERR_PTR(r);
 	}
 
-	return tfm;
+	return drbg_tfm;
 }
 
 static int fips_drbgs_init(void)
 {
-	struct crypto_rng **drbg_tfms;
-	struct crypto_rng *drbg_tfm;
+	struct fips_drbg_tfm __rcu **drbg_tfms;
+	struct fips_drbg_tfm *drbg_tfm;
 
 	mutex_lock(&fips_init_mtx);
 	if (fips_drbg_tfms) {
@@ -804,21 +825,83 @@ static int fips_drbgs_init(void)
 				   PTR_ERR(drbg_tfm));
 		return PTR_ERR(drbg_tfm);
 	}
-
-	WRITE_ONCE(drbg_tfms[0], drbg_tfm);
+	rcu_assign_pointer(drbg_tfms[0], drbg_tfm);
 
 	pr_debug("random: FIPS drbg: init complete");
 	smp_store_release(&fips_drbg_tfms, drbg_tfms);
 
+	WRITE_ONCE(fips_do_drbg_algs_reset, 0);
 	mutex_unlock(&fips_init_mtx);
 
 	return 0;
 }
 
-static struct crypto_rng* fips_drbg_tfm_get(int node)
+static void fips_drbg_tfm_put(struct fips_drbg_tfm *drbg_tfm);
+
+static int fips_reset_drbg_algs(void)
+{
+	struct fips_drbg_tfm *new_node0_drbg_tfm, *drbg_tfm, *n;
+	LIST_HEAD(old_drbg_tfms);
+	int i;
+
+	mutex_lock(&fips_init_mtx);
+	if (!fips_do_drbg_algs_reset) {
+		mutex_unlock(&fips_init_mtx);
+		return 0;
+	}
+
+	new_node0_drbg_tfm = __fips_drbg_tfm_alloc();
+	if (IS_ERR(new_node0_drbg_tfm)) {
+		mutex_unlock(&fips_init_mtx);
+		pr_warn_ratelimited("random: FIPS drbg: drbg init failed at reset (%ld), performance degraded.",
+				    PTR_ERR(new_node0_drbg_tfm));
+		return PTR_ERR(new_node0_drbg_tfm);
+	}
+
+	for (i = 0; i < nr_node_ids; ++i) {
+		drbg_tfm = rcu_dereference_protected(fips_drbg_tfms[i], 1);
+		rcu_assign_pointer(fips_drbg_tfms[i], new_node0_drbg_tfm);
+		/* The remaining drbg's for i > 0 all get reset to NULL. */
+		new_node0_drbg_tfm = NULL;
+		if (drbg_tfm)
+			list_add(&drbg_tfm->reset_list, &old_drbg_tfms);
+	}
+
+	pr_debug("random: FIPS drbg: reset complete");
+	WRITE_ONCE(fips_do_drbg_algs_reset, 0);
+	mutex_unlock(&fips_init_mtx);
+
+	synchronize_rcu();
+	list_for_each_entry_safe(drbg_tfm, n, &old_drbg_tfms, reset_list) {
+		list_del(&drbg_tfm->reset_list);
+		/* Drop the reference owned by fips_drbg_tfms[]. */
+		fips_drbg_tfm_put(drbg_tfm);
+	}
+
+	return 0;
+}
+
+static void fips_trigger_drbg_algs_reset(void)
 {
-	struct crypto_rng **drbg_tfms;
-	struct crypto_rng *drbg_tfm;
+	pr_debug("random: FIPS drbg: reset requested");
+	mutex_lock(&fips_init_mtx);
+	WRITE_ONCE(fips_do_drbg_algs_reset, 1);
+	mutex_unlock(&fips_init_mtx);
+}
+
+static void fips_drbg_tfm_put(struct fips_drbg_tfm *drbg_tfm)
+{
+	if (IS_ERR_OR_NULL(drbg_tfm))
+		return;
+
+	if (refcount_dec_and_test(&drbg_tfm->ref))
+		__fips_drbg_tfm_free(drbg_tfm);
+}
+
+static struct fips_drbg_tfm* fips_drbg_tfm_get(int node)
+{
+	struct fips_drbg_tfm __rcu **drbg_tfms;
+	struct fips_drbg_tfm *drbg_tfm;
 
 	drbg_tfms = smp_load_acquire(&fips_drbg_tfms);
 	if (unlikely(!drbg_tfms)) {
@@ -831,15 +914,25 @@ static struct crypto_rng* fips_drbg_tfm_
 	}
 
 again:
-	drbg_tfm = smp_load_acquire(&drbg_tfms[node]);
-	if (likely(drbg_tfm))
+	if (READ_ONCE(fips_do_drbg_algs_reset) && fips_reset_drbg_algs()) {
+		/* Reset failed, resort to backup DRBG at node 0. */
+		node = 0;
+	}
+
+	rcu_read_lock();
+	drbg_tfm = rcu_dereference(drbg_tfms[node]);
+	if (likely(drbg_tfm)) {
+		refcount_inc(&drbg_tfm->ref);
+		rcu_read_unlock();
 		return drbg_tfm;
+	}
+	rcu_read_unlock();
 
 	if (WARN_ON_ONCE(!node))
 		return NULL;
 
 	mutex_lock(&fips_init_mtx);
-	if (drbg_tfms[node]) {
+	if (fips_do_drbg_algs_reset || drbg_tfms[node]) {
 		mutex_unlock(&fips_init_mtx);
 		goto again;
 	}
@@ -853,16 +946,43 @@ again:
 		goto again;
 	}
 
-	smp_store_release(&drbg_tfms[node], drbg_tfm);
+	refcount_inc(&drbg_tfm->ref);
+	rcu_assign_pointer(drbg_tfms[node], drbg_tfm);
 	mutex_unlock(&fips_init_mtx);
 
 	return drbg_tfm;
 }
 
+static int fips_crypto_notify(struct notifier_block *this,
+			      unsigned long msg, void *data)
+{
+	struct crypto_alg *alg = data;
+
+	if (msg != CRYPTO_MSG_ALG_LOADED)
+		return NOTIFY_DONE;
+
+	if (strcmp(alg->cra_name, "sha512"))
+		return NOTIFY_DONE;
+
+	fips_trigger_drbg_algs_reset();
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block fips_crypto_notifier = {
+	.notifier_call = fips_crypto_notify,
+	/*
+	 * Make sure this gets to run before cryptomgr_notify() so
+	 * that the final notifier chain result will be taken from
+	 * there.
+	 */
+	.priority = 100,
+};
+
 static ssize_t extract_fips_drbg_user(void __user *buf, size_t nbytes,
 				      __u8 __tmp[SHA512_DIGEST_SIZE])
 {
-	struct crypto_rng *drbg_tfm;
+	struct fips_drbg_tfm *drbg_tfm;
 	unsigned int block_size;
 	__u8 *tmp;
 	ssize_t ret = 0, i;
@@ -902,7 +1022,7 @@ static ssize_t extract_fips_drbg_user(vo
 		}
 
 		i = min_t(size_t, nbytes, block_size);
-		r = crypto_rng_get_bytes(drbg_tfm, tmp, (unsigned int)i);
+		r = crypto_rng_get_bytes(drbg_tfm->tfm, tmp, (unsigned int)i);
 		if (r < 0) {
 			ret = r;
 			break;
@@ -923,6 +1043,8 @@ static ssize_t extract_fips_drbg_user(vo
 	if (likely(tmp != __tmp))
 		kfree(tmp);
 
+	fips_drbg_tfm_put(drbg_tfm);
+
 	return ret;
 }
 
@@ -1990,6 +2112,10 @@ int __init rand_initialize(void)
 		urandom_warning.interval = 0;
 		unseeded_warning.interval = 0;
 	}
+
+	if (fips_enabled)
+		crypto_register_notifier(&fips_crypto_notifier);
+
 	return 0;
 }