1
0
mirror of https://git.FreeBSD.org/src.git synced 2024-12-18 10:35:55 +00:00

random(4): Generalize algorithm-independent APIs

At a basic level, remove assumptions about the underlying algorithm (such as
output block size and reseeding requirements) from the algorithm-independent
logic in randomdev.c.  Chacha20 does not have many of the restrictions that
AES-ICM does as a PRF (Pseudo-Random Function), because it has a cipher
block size of 512 bits.  The motivation is that by generalizing the API,
Chacha is not penalized by the limitations of AES.

In READ_RANDOM_UIO, first attempt to NOWAIT allocate a large enough buffer
for the entire user request, or the maximal input we'll accept between
signal checking, whichever is smaller.  The idea is that the implementation
of any randomdev algorithm is then free to divide up large requests in
whatever fashion it sees fit.

As part of this, two responsibilities from the "algorithm-generic" randomdev
code are pushed down into the Fortuna ra_read implementation (and any other
future or out-of-tree ra_read implementations):

  1. If an algorithm needs to rekey every N bytes, it is responsible for
  handling that in ra_read(). (I.e., Fortuna's 1MB rekey interval for AES
  block generation.)

  2. If an algorithm uses a block cipher that doesn't tolerate partial-block
  requests (again, e.g., AES), it is also responsible for handling that in
  ra_read().

Several APIs are changed from u_int buffer length to the more canonical
size_t.  Several APIs are changed from taking a blockcount to a bytecount,
to permit PRFs like Chacha20 to directly generate quantities of output that
are not multiples of RANDOM_BLOCKSIZE (AES block size).

The Fortuna algorithm is changed to NOT rekey every 1MiB when in Chacha20
mode (kern.random.use_chacha20_cipher="1").  This is explicitly supported by
the math in FS&K §9.4 (Ferguson, Schneier, and Kohno; "Cryptography
Engineering"), as well as by their conclusion: "If we had a block cipher
with a 256-bit [or greater] block size, then the collisions would not
have been an issue at all."

For now, continue to break up reads into PAGE_SIZE chunks, as they were
before.  So, no functional change, mostly.

Reviewed by:	markm
Approved by:	secteam(delphij)
Differential Revision:	https://reviews.freebsd.org/D20312
This commit is contained in:
Conrad Meyer 2019-06-17 15:09:12 +00:00
parent 403c041316
commit d0d71d818c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=349138
7 changed files with 191 additions and 115 deletions

View File

@ -128,7 +128,7 @@ static uint8_t zero_region[RANDOM_ZERO_BLOCKSIZE];
#endif
static void random_fortuna_pre_read(void);
static void random_fortuna_read(uint8_t *, u_int);
static void random_fortuna_read(uint8_t *, size_t);
static bool random_fortuna_seeded(void);
static bool random_fortuna_seeded_internal(void);
static void random_fortuna_process_event(struct harvest_event *);
@ -305,50 +305,46 @@ random_fortuna_reseed_internal(uint32_t *entropy_data, u_int blockcount)
uint128_increment(&fortuna_state.fs_counter);
}
/*-
* FS&K - GenerateBlocks()
* Generate a number of complete blocks of random output.
*/
static __inline void
random_fortuna_genblocks(uint8_t *buf, u_int blockcount)
{
RANDOM_RESEED_ASSERT_LOCK_OWNED();
KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
/*
* Fills buf with RANDOM_BLOCKSIZE * blockcount bytes of keystream.
* Increments fs_counter as it goes.
*/
randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
buf, blockcount);
}
/*-
* FS&K - PseudoRandomData()
* This generates no more than 2^20 bytes of data, and cleans up its
* internal state when finished. It is assumed that a whole number of
* blocks are available for writing; any excess generated will be
* ignored.
*
* If Chacha20 is used, output size is unrestricted. If AES-CTR is used,
* output size MUST be <= 1MB and a multiple of RANDOM_BLOCKSIZE. The
* reasoning for this is discussed in FS&K 9.4; the significant distinction
* between the two ciphers is that AES has a *block* size of 128 bits while
* Chacha has a *block* size of 512 bits.
*/
static __inline void
random_fortuna_genrandom(uint8_t *buf, u_int bytecount)
random_fortuna_genrandom(uint8_t *buf, size_t bytecount)
{
uint8_t temp[RANDOM_BLOCKSIZE * RANDOM_KEYS_PER_BLOCK];
u_int blockcount;
uint8_t newkey[RANDOM_KEYSIZE];
RANDOM_RESEED_ASSERT_LOCK_OWNED();
/*-
* FS&K - assert(n < 2^20 (== 1 MB)
* FS&K - assert(n < 2^20 (== 1 MB)) when 128-bit block cipher is used
* - r = first-n-bytes(GenerateBlocks(ceil(n/16)))
* - K = GenerateBlocks(2)
*/
KASSERT((bytecount <= RANDOM_FORTUNA_MAX_READ), ("invalid single read request to Fortuna of %d bytes", bytecount));
blockcount = howmany(bytecount, RANDOM_BLOCKSIZE);
random_fortuna_genblocks(buf, blockcount);
random_fortuna_genblocks(temp, RANDOM_KEYS_PER_BLOCK);
randomdev_encrypt_init(&fortuna_state.fs_key, temp);
explicit_bzero(temp, sizeof(temp));
KASSERT(random_chachamode || bytecount <= RANDOM_FORTUNA_MAX_READ,
("%s: invalid large read request: %zu bytes", __func__,
bytecount));
/*
* This is where FS&K would invoke GenerateBlocks(). GenerateBlocks()
* doesn't make a lot of sense or have much value if we use bytecount
* for the API (which is useful for ciphers that do not require
* block-sized output, like Chacha20).
*
* Just invoke our PRF abstraction directly, which is responsible for
* updating fs_counter ('C').
*/
randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
buf, bytecount);
randomdev_keystream(&fortuna_state.fs_key, &fortuna_state.fs_counter,
newkey, sizeof(newkey));
randomdev_encrypt_init(&fortuna_state.fs_key, newkey);
explicit_bzero(newkey, sizeof(newkey));
}
/*-
@ -441,18 +437,71 @@ random_fortuna_pre_read(void)
* FS&K - RandomData() (Part 2)
* Main read from Fortuna, continued. May be called multiple times after
* the random_fortuna_pre_read() above.
* The supplied buf MUST be a multiple of RANDOM_BLOCKSIZE in size.
* Lots of code presumes this for efficiency, both here and in other
* routines. You are NOT allowed to break this!
*
* The supplied buf MAY not be a multiple of RANDOM_BLOCKSIZE in size; it is
* the responsibility of the algorithm to accommodate partial block reads, if a
* block output mode is used.
*/
void
random_fortuna_read(uint8_t *buf, u_int bytecount)
random_fortuna_read(uint8_t *buf, size_t bytecount)
{
uint8_t remainder_buf[RANDOM_BLOCKSIZE];
size_t read_directly_len, read_chunk;
/*
* The underlying AES generator expects multiples of RANDOM_BLOCKSIZE.
*/
if (random_chachamode)
read_directly_len = bytecount;
else
read_directly_len = rounddown(bytecount, RANDOM_BLOCKSIZE);
KASSERT((bytecount % RANDOM_BLOCKSIZE) == 0, ("%s(): bytecount (= %d) must be a multiple of %d", __func__, bytecount, RANDOM_BLOCKSIZE ));
RANDOM_RESEED_LOCK();
random_fortuna_genrandom(buf, bytecount);
KASSERT(!uint128_is_zero(fortuna_state.fs_counter), ("FS&K: C != 0"));
while (read_directly_len > 0) {
/*
* 128-bit block ciphers like AES must be re-keyed at 1MB
* intervals to avoid unacceptable statistical differentiation
* from true random data.
*
* 512-bit block ciphers like Chacha20 do not have this
* problem. (FS&K 9.4)
*/
if (random_chachamode)
read_chunk = read_directly_len;
else
read_chunk = MIN(read_directly_len,
RANDOM_FORTUNA_MAX_READ);
/*
* For now, we hold the global Fortuna mutex, so yield
* periodically to provide vague availability to other lock
* users. PAGE_SIZE is chosen to match existing behavior.
*/
read_chunk = MIN(read_chunk, PAGE_SIZE);
random_fortuna_genrandom(buf, read_chunk);
buf += read_chunk;
read_directly_len -= read_chunk;
bytecount -= read_chunk;
/* Perform the actual yield. */
if (read_directly_len != 0) {
RANDOM_RESEED_UNLOCK();
RANDOM_RESEED_LOCK();
}
}
if (bytecount > 0)
random_fortuna_genrandom(remainder_buf, sizeof(remainder_buf));
RANDOM_RESEED_UNLOCK();
if (bytecount > 0) {
memcpy(buf, remainder_buf, bytecount);
explicit_bzero(remainder_buf, sizeof(remainder_buf));
}
}
#ifdef _KERNEL

View File

@ -125,16 +125,18 @@ randomdev_encrypt_init(union randomdev_key *context, const void *data)
}
/*
* Create a psuedorandom output stream of 'blockcount' blocks using a CTR-mode
* Create a psuedorandom output stream of 'bytecount' bytes using a CTR-mode
* cipher or similar. The 128-bit counter is supplied in the in-out parmeter
* 'ctr.' The output stream goes to 'd_out.' 'blockcount' RANDOM_BLOCKSIZE
* bytes are generated.
* 'ctr.' The output stream goes to 'd_out.'
*
* If AES is used, 'bytecount' is guaranteed to be a multiple of
* RANDOM_BLOCKSIZE.
*/
void
randomdev_keystream(union randomdev_key *context, uint128_t *ctr,
void *d_out, u_int blockcount)
void *d_out, size_t bytecount)
{
u_int i;
size_t i, blockcount, read_chunk;
if (random_chachamode) {
uint128_t lectr;
@ -147,8 +149,20 @@ randomdev_keystream(union randomdev_key *context, uint128_t *ctr,
le128enc(&lectr, *ctr);
chacha_ivsetup(&context->chacha, NULL, (const void *)&lectr);
chacha_encrypt_bytes(&context->chacha, NULL, d_out,
RANDOM_BLOCKSIZE * blockcount);
while (bytecount > 0) {
/*
* We are limited by the chacha_encrypt_bytes API to
* u32 bytes per chunk.
*/
read_chunk = MIN(bytecount,
rounddown((size_t)UINT32_MAX, CHACHA_BLOCKLEN));
chacha_encrypt_bytes(&context->chacha, NULL, d_out,
read_chunk);
d_out = (char *)d_out + read_chunk;
bytecount -= read_chunk;
}
/*
* Decode Chacha-updated LE counter to native endian and store
@ -156,7 +170,14 @@ randomdev_keystream(union randomdev_key *context, uint128_t *ctr,
*/
chacha_ctrsave(&context->chacha, (void *)&lectr);
*ctr = le128dec(&lectr);
explicit_bzero(&lectr, sizeof(lectr));
} else {
KASSERT(bytecount % RANDOM_BLOCKSIZE == 0,
("%s: AES mode invalid bytecount, not a multiple of native "
"block size", __func__));
blockcount = bytecount / RANDOM_BLOCKSIZE;
for (i = 0; i < blockcount; i++) {
/*-
* FS&K - r = r|E(K,C)

View File

@ -61,7 +61,7 @@ void randomdev_hash_iterate(struct randomdev_hash *, const void *, size_t);
void randomdev_hash_finish(struct randomdev_hash *, void *);
void randomdev_encrypt_init(union randomdev_key *, const void *);
void randomdev_keystream(union randomdev_key *context, uint128_t *, void *, u_int);
void randomdev_keystream(union randomdev_key *context, uint128_t *, void *, size_t);
void randomdev_getkey(union randomdev_key *, const void **, size_t *);
#endif /* SYS_DEV_RANDOM_HASH_H_INCLUDED */

View File

@ -84,7 +84,7 @@ __FBSDID("$FreeBSD$");
#endif /* _KERNEL */
static void random_other_pre_read(void);
static void random_other_read(uint8_t *, u_int);
static void random_other_read(uint8_t *, size_t);
static bool random_other_seeded(void);
static void random_other_process_event(struct harvest_event *);
static void random_other_init_alg(void *);
@ -165,10 +165,10 @@ random_other_pre_read(void)
}
/*
* void random_other_read(uint8_t *buf, u_int count)
* void random_other_read(uint8_t *buf, size_t count)
*
* Generate <count> bytes of output into <*buf>.
* You may use the fact that <count> will be a multiple of
* You may NOT use the fact that <count> will be a multiple of
* RANDOM_BLOCKSIZE for optimization purposes.
*
* This function will always be called with your generator
@ -176,7 +176,7 @@ random_other_pre_read(void)
* output here, then feel free to KASSERT() or panic().
*/
static void
random_other_read(uint8_t *buf, u_int count)
random_other_read(uint8_t *buf, size_t count)
{
RANDOM_RESEED_LOCK();

View File

@ -172,17 +172,21 @@ randomdev_wait_until_seeded(bool interruptible)
int
READ_RANDOM_UIO(struct uio *uio, bool nonblock)
{
uint8_t *random_buf;
int error;
ssize_t read_len, total_read, c;
/* 16 MiB takes about 0.08 s CPU time on my 2017 AMD Zen CPU */
#define SIGCHK_PERIOD (16 * 1024 * 1024)
const size_t sigchk_period = SIGCHK_PERIOD;
CTASSERT(SIGCHK_PERIOD % PAGE_SIZE == 0);
#undef SIGCHK_PERIOD
random_buf = malloc(PAGE_SIZE, M_ENTROPY, M_WAITOK);
uint8_t *random_buf;
size_t total_read, read_len;
ssize_t bufsize;
int error;
KASSERT(uio->uio_rw == UIO_READ, ("%s: bogus write", __func__));
KASSERT(uio->uio_resid >= 0, ("%s: bogus negative resid", __func__));
p_random_alg_context->ra_pre_read();
error = 0;
/* (Un)Blocking logic */
@ -193,44 +197,64 @@ READ_RANDOM_UIO(struct uio *uio, bool nonblock)
error = randomdev_wait_until_seeded(
SEEDWAIT_INTERRUPTIBLE);
}
if (error == 0) {
read_rate_increment((uio->uio_resid + sizeof(uint32_t))/sizeof(uint32_t));
total_read = 0;
while (uio->uio_resid && !error) {
read_len = uio->uio_resid;
/*
* Belt-and-braces.
* Round up the read length to a crypto block size multiple,
* which is what the underlying generator is expecting.
* See the random_buf size requirements in the Fortuna code.
*/
read_len = roundup(read_len, RANDOM_BLOCKSIZE);
/* Work in chunks page-sized or less */
read_len = MIN(read_len, PAGE_SIZE);
p_random_alg_context->ra_read(random_buf, read_len);
c = MIN(uio->uio_resid, read_len);
/*
* uiomove() may yield the CPU before each 'c' bytes
* (up to PAGE_SIZE) are copied out.
*/
error = uiomove(random_buf, c, uio);
total_read += c;
/*
* Poll for signals every few MBs to avoid very long
* uninterruptible syscalls.
*/
if (error == 0 && uio->uio_resid != 0 &&
total_read % sigchk_period == 0) {
error = tsleep_sbt(&random_alg_context, PCATCH,
"randrd", SBT_1NS, 0, C_HARDCLOCK);
/* Squash tsleep timeout condition */
if (error == EWOULDBLOCK)
error = 0;
}
}
if (error == ERESTART || error == EINTR)
error = 0;
if (error != 0)
return (error);
read_rate_increment(howmany(uio->uio_resid + 1, sizeof(uint32_t)));
total_read = 0;
/* Easy to deal with the trivial 0 byte case. */
if (__predict_false(uio->uio_resid == 0))
return (0);
/*
* If memory is plentiful, use maximally sized requests to avoid
* per-call algorithm overhead. But fall back to a single page
* allocation if the full request isn't immediately available.
*/
bufsize = MIN(sigchk_period, (size_t)uio->uio_resid);
random_buf = malloc(bufsize, M_ENTROPY, M_NOWAIT);
if (random_buf == NULL) {
bufsize = PAGE_SIZE;
random_buf = malloc(bufsize, M_ENTROPY, M_WAITOK);
}
error = 0;
while (uio->uio_resid > 0 && error == 0) {
read_len = MIN((size_t)uio->uio_resid, bufsize);
p_random_alg_context->ra_read(random_buf, read_len);
/*
* uiomove() may yield the CPU before each 'read_len' bytes (up
* to bufsize) are copied out.
*/
error = uiomove(random_buf, read_len, uio);
total_read += read_len;
/*
* Poll for signals every few MBs to avoid very long
* uninterruptible syscalls.
*/
if (error == 0 && uio->uio_resid != 0 &&
total_read % sigchk_period == 0) {
error = tsleep_sbt(&random_alg_context, PCATCH,
"randrd", SBT_1NS, 0, C_HARDCLOCK);
/* Squash tsleep timeout condition */
if (error == EWOULDBLOCK)
error = 0;
}
}
/*
* Short reads due to signal interrupt should not indicate error.
* Instead, the uio will reflect that the read was shorter than
* requested.
*/
if (error == ERESTART || error == EINTR)
error = 0;
explicit_bzero(random_buf, bufsize);
free(random_buf, M_ENTROPY);
return (error);
}
@ -249,7 +273,6 @@ READ_RANDOM_UIO(struct uio *uio, bool nonblock)
void
READ_RANDOM(void *random_buf, u_int len)
{
u_int read_directly_len;
KASSERT(random_buf != NULL, ("No suitable random buffer in %s", __func__));
p_random_alg_context->ra_pre_read();
@ -278,23 +301,7 @@ READ_RANDOM(void *random_buf, u_int len)
(void)randomdev_wait_until_seeded(SEEDWAIT_UNINTERRUPTIBLE);
}
read_rate_increment(roundup2(len, sizeof(uint32_t)));
/*
* The underlying generator expects multiples of
* RANDOM_BLOCKSIZE.
*/
read_directly_len = rounddown(len, RANDOM_BLOCKSIZE);
if (read_directly_len > 0)
p_random_alg_context->ra_read(random_buf, read_directly_len);
if (read_directly_len < len) {
uint8_t remainder_buf[RANDOM_BLOCKSIZE];
p_random_alg_context->ra_read(remainder_buf,
sizeof(remainder_buf));
memcpy((char *)random_buf + read_directly_len, remainder_buf,
len - read_directly_len);
explicit_bzero(remainder_buf, sizeof(remainder_buf));
}
p_random_alg_context->ra_read(random_buf, len);
}
bool

View File

@ -68,7 +68,7 @@ struct harvest_event;
typedef void random_alg_init_t(void *);
typedef void random_alg_deinit_t(void *);
typedef void random_alg_pre_read_t(void);
typedef void random_alg_read_t(uint8_t *, u_int);
typedef void random_alg_read_t(uint8_t *, size_t);
typedef bool random_alg_seeded_t(void);
typedef void random_alg_reseed_t(void);
typedef void random_alg_eventprocessor_t(struct harvest_event *);

View File

@ -209,8 +209,7 @@ ATF_TC_BODY(uint128_chacha_ctr, tc)
vec_u32_tole128(expectedle, tests[i].expected);
a = le128dec(inputle);
randomdev_keystream(&context, &a, trash, sizeof(trash) /
RANDOM_BLOCKSIZE);
randomdev_keystream(&context, &a, trash, sizeof(trash));
u128_check_equality(le128dec(expectedle), a, tests[i].descr);
}