x86: Speed up clock calibration

Prior to this commit, the TSC and local APIC frequencies were calibrated at boot time by measuring the clocks before and after a one-second sleep. This was simple and effective, but had the disadvantage of *requiring a one-second sleep*. Rather than making two clock measurements (before and after sleeping) we now perform many measurements; and rather than simply subtracting the starting count from the ending count, we calculate a best-fit regression between the target clock and the reference clock (for which the current best available timecounter is used). While we do this, we keep track of an estimate of the uncertainty in the regression slope (aka. the ratio of clock speeds), and stop measuring when we believe the uncertainty is less than 1 PPM. In order to avoid the risk of aliasing resulting from the data-gathering loop synchronizing with (a multiple of) the frequency of the reference clock, we add some additional spinning depending upon the iteration number. For numerical stability and simplicity of implementation, we make use of floating-point arithmetic for the statistical calculations. On the author's Dell laptop, this reduces the time spent in calibration from 2000 ms to 29 ms; on an EC2 c5.xlarge instance, it is reduced from 2000 ms to 2.5 ms. Reviewed by: bde (previous version), kib MFC after: 1 month Sponsored by: https://www.patreon.com/cperciva Differential Revision: https://reviews.freebsd.org/D33802
2024-12-14 10:09:48 +00:00 · 2022-01-09 17:22:20 -08:00 · 2022-01-09 17:22:20 -08:00 · c2705ceaeb
commit c2705ceaeb
parent 12f747e6ff
6 changed files with 229 additions and 50 deletions
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@ -407,3 +407,9 @@ contrib/openzfs/module/zfs/vdev_raidz_math_avx512bw.c		optional zfs compile-with
 contrib/openzfs/module/zfs/vdev_raidz_math_avx512f.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_raidz_math_sse2.c		optional zfs compile-with "${ZFS_C}"
 contrib/openzfs/module/zfs/vdev_raidz_math_ssse3.c		optional zfs compile-with "${ZFS_C}"
+# Clock calibration subroutine; uses floating-point arithmetic
+subr_clockcalib.o		standard				\
+	dependency	"$S/kern/subr_clockcalib.c"			\
+	compile-with	"${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} -mmmx -msse -msse2 ${.IMPSRC}" \
+	no-implicit-rule						\
+	clean		"subr_clockcalib.o"
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@ -171,3 +171,9 @@ x86/x86/local_apic.c		optional apic
 x86/x86/mptable.c		optional apic
 x86/x86/mptable_pci.c		optional apic pci
 x86/x86/msi.c			optional apic pci
+# Clock calibration subroutine; uses floating-point arithmetic
+subr_clockcalib.o		standard				\
+	dependency	"$S/kern/subr_clockcalib.c"			\
+	compile-with	"${CC} -c ${CFLAGS:C/^-O2$/-O3/:N-nostdinc} ${WERROR} -m80387 ${.IMPSRC}" \
+	no-implicit-rule						\
+	clean		"subr_clockcalib.o"
--- a/sys/kern/subr_clockcalib.c
+++ b/sys/kern/subr_clockcalib.c
@ -0,0 +1,183 @@
+/*-
+ * Copyright (c) 2022 Colin Percival
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/timetc.h>
+#include <sys/tslog.h>
+#include <machine/cpu.h>
+
+/**
+ * clockcalib(clk, clkname):
+ * Return the frequency of the provided timer, as calibrated against the
+ * current best-available timecounter.
+ */
+uint64_t
+clockcalib(uint64_t (*clk)(void), const char *clkname)
+{
+	struct timecounter *tc = atomic_load_ptr(&timecounter);
+	uint64_t clk0, clk1, clk_delay, n, passes = 0;
+	uint64_t t0, t1, tadj, tlast;
+	double mu_clk = 0;
+	double mu_t = 0;
+	double va_clk = 0;
+	double va_t = 0;
+	double cva = 0;
+	double d1, d2;
+	double inv_n;
+	uint64_t freq;
+
+	TSENTER();
+	/*-
+	 * The idea here is to compute a best-fit linear regression between
+	 * the clock we're calibrating and the reference clock; the slope of
+	 * that line multiplied by the frequency of the reference clock gives
+	 * us the frequency we're looking for.
+	 *
+	 * To do this, we calculate the
+	 * (a) mean of the target clock measurements,
+	 * (b) variance of the target clock measurements,
+	 * (c) mean of the reference clock measurements,
+	 * (d) variance of the reference clock measurements, and
+	 * (e) covariance of the target clock and reference clock measurements
+	 * on an ongoing basis, updating all five values after each new data
+	 * point arrives, stopping when we're confident that we've accurately
+	 * measured the target clock frequency.
+	 *
+	 * Given those five values, the important formulas to remember from
+	 * introductory statistics are:
+	 * 1. slope of regression line = covariance(x, y) / variance(x)
+	 * 2. (relative uncertainty in slope)^2 =
+	 *    (variance(x) * variance(y) - covariance(x, y)^2)
+	 *    ------------------------------------------------
+	 *              covariance(x, y)^2 * (N - 2)
+	 *
+	 * We adjust the second formula slightly, adding a term to each of
+	 * the variance values to reflect the measurement quantization.
+	 *
+	 * Finally, we need to determine when to stop gathering data.  We
+	 * can't simply stop as soon as the computed uncertainty estimate
+	 * is below our threshold; this would make us overconfident since it
+	 * would introduce a multiple-comparisons problem (cf. sequential
+	 * analysis in clinical trials).  Instead, we stop with N data points
+	 * if the estimated uncertainty of the first k data points meets our
+	 * target for all N/2 < k <= N; this is not theoretically optimal,
+	 * but in practice works well enough.
+	 */
+
+	/*
+	 * Initial values for clocks; we'll subtract these off from values
+	 * we measure later in order to reduce floating-point rounding errors.
+	 * We keep track of an adjustment for values read from the reference
+	 * timecounter, since it can wrap.
+	 */
+	clk0 = clk();
+	t0 = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
+	tadj = 0;
+	tlast = t0;
+
+	/* Loop until we give up or decide that we're calibrated. */
+	for (n = 1; ; n++) {
+		/* Get a new data point. */
+		clk1 = clk() - clk0;
+		t1 = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
+		while (t1 + tadj < tlast)
+			tadj += tc->tc_counter_mask + 1;
+		tlast = t1 + tadj;
+		t1 += tadj - t0;
+
+		/* If we spent too long, bail. */
+		if (t1 > tc->tc_frequency) {
+			printf("Statistical %s calibration failed!  "
+			    "Clocks might be ticking at variable rates.\n",
+			     clkname);
+			printf("Falling back to slow %s calibration.\n",
+			    clkname);
+			freq = (double)(tc->tc_frequency) * clk1 / t1;
+			break;
+		}
+
+		/* Precompute to save on divisions later. */
+		inv_n = 1.0 / n;
+
+		/* Update mean and variance of recorded TSC values. */
+		d1 = clk1 - mu_clk;
+		mu_clk += d1 * inv_n;
+		d2 = d1 * (clk1 - mu_clk);
+		va_clk += (d2 - va_clk) * inv_n;
+
+		/* Update mean and variance of recorded time values. */
+		d1 = t1 - mu_t;
+		mu_t += d1 * inv_n;
+		d2 = d1 * (t1 - mu_t);
+		va_t += (d2 - va_t) * inv_n;
+
+		/* Update covariance. */
+		d2 = d1 * (clk1 - mu_clk);
+		cva += (d2 - cva) * inv_n;
+
+		/*
+		 * Count low-uncertainty iterations.  This is a rearrangement
+		 * of "relative uncertainty < 1 PPM" avoiding division.
+		 */
+#define TSC_PPM_UNCERTAINTY	1
+#define TSC_UNCERTAINTY		TSC_PPM_UNCERTAINTY * 0.000001
+#define TSC_UNCERTAINTY_SQR	TSC_UNCERTAINTY * TSC_UNCERTAINTY
+		if (TSC_UNCERTAINTY_SQR * (n - 2) * cva * cva >
+		    (va_t + 4) * (va_clk + 4) - cva * cva)
+			passes++;
+		else
+			passes = 0;
+
+		/* Break if we're consistently certain. */
+		if (passes * 2 > n) {
+			freq = (double)(tc->tc_frequency) * cva / va_t;
+			if (bootverbose)
+				printf("Statistical %s calibration took"
+				    " %lu us and %lu data points\n",
+				    clkname, (unsigned long)(t1 *
+					1000000.0 / tc->tc_frequency),
+				    (unsigned long)n);
+			break;
+		}
+
+		/*
+		 * Add variable delay to avoid theoretical risk of aliasing
+		 * resulting from this loop synchronizing with the frequency
+		 * of the reference clock.  On the nth iteration, we spend
+		 * O(1 / n) time here -- long enough to avoid aliasing, but
+		 * short enough to be insignificant as n grows.
+		 */
+		clk_delay = clk() + (clk() - clk0) / (n * n);
+		while (clk() < clk_delay)
+			cpu_spinwait(); /* Do nothing. */
+	}
+	TSEXIT();
+	return (freq);
+}
--- a/sys/sys/timetc.h
+++ b/sys/sys/timetc.h
@ -96,4 +96,11 @@ void	cpu_tick_calibration(void);
 SYSCTL_DECL(_kern_timecounter);
 #endif

+/**
+ * clockcalib(clk, clkname):
+ * Return the frequency of the provided timer, as calibrated against the
+ * current best-available timecounter.
+ */
+uint64_t clockcalib(uint64_t (*)(void), const char *);
+
 #endif /* !_SYS_TIMETC_H_ */
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/smp.h>
 #include <sys/sysctl.h>
 #include <sys/timeet.h>
+#include <sys/timetc.h>

 #include <vm/vm.h>
 #include <vm/pmap.h>
@ -64,6 +65,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/clock.h>
 #include <machine/cpufunc.h>
 #include <machine/cputypes.h>
+#include <machine/fpu.h>
 #include <machine/frame.h>
 #include <machine/intr_machdep.h>
 #include <x86/apicvar.h>
@ -1000,30 +1002,39 @@ native_lapic_disable_pmc(void)
 #endif
 }

+static uint64_t
+cb_lapic_getcount(void)
+{
+
+	return (APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER));
+}
+
 static void
 lapic_calibrate_initcount(struct lapic *la)
 {
-	u_long value;
+	uint64_t freq;

-	/* Start off with a divisor of 2 (power on reset default). */
+	/* Calibrate the APIC timer frequency. */
+	lapic_timer_set_divisor(2);
+	lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT);
+	fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);
+	freq = clockcalib(cb_lapic_getcount, "lapic");
+	fpu_kern_leave(curthread, NULL);
+
+	/* Pick a different divisor if necessary. */
 	lapic_timer_divisor = 2;
-	/* Try to calibrate the local APIC timer. */
 	do {
-		lapic_timer_set_divisor(lapic_timer_divisor);
-		lapic_timer_oneshot_nointr(la, APIC_TIMER_MAX_COUNT);
-		DELAY(1000000);
-		value = APIC_TIMER_MAX_COUNT - lapic_read32(LAPIC_CCR_TIMER);
-		if (value != APIC_TIMER_MAX_COUNT)
+		if (freq * 2 / lapic_timer_divisor < APIC_TIMER_MAX_COUNT)
 			break;
 		lapic_timer_divisor <<= 1;
 	} while (lapic_timer_divisor <= 128);
 	if (lapic_timer_divisor > 128)
 		panic("lapic: Divisor too big");
+	count_freq = freq * 2 / lapic_timer_divisor;
 	if (bootverbose) {
 		printf("lapic: Divisor %lu, Frequency %lu Hz\n",
-		    lapic_timer_divisor, value);
+		    lapic_timer_divisor, count_freq);
 	}
-	count_freq = value;
 }

 static void
--- a/sys/x86/x86/tsc.c
+++ b/sys/x86/x86/tsc.c
@ -48,6 +48,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/vdso.h>
 #include <machine/clock.h>
 #include <machine/cputypes.h>
+#include <machine/fpu.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>
 #include <x86/vmware.h>
@ -703,53 +704,18 @@ tsc_update_freq(uint64_t new_freq)
 void
 tsc_calibrate(void)
 {
-	struct timecounter *tc;
-	uint64_t freq, tsc_start, tsc_end;
-	u_int t_start, t_end;
-	register_t flags;
-	int cpu;
+	uint64_t freq;

 	if (tsc_disabled)
 		return;
 	if (tsc_early_calib_exact)
 		goto calibrated;

-	/*
-	 * Avoid using a low-quality timecounter to re-calibrate.  In
-	 * particular, old 32-bit platforms might only have the 8254 timer to
-	 * calibrate against.
-	 */
-	tc = atomic_load_ptr(&timecounter);
-	if (tc->tc_quality <= 0)
-		goto calibrated;
-
-	flags = intr_disable();
-	cpu = curcpu;
-	tsc_start = rdtsc_ordered();
-	t_start = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
-	intr_restore(flags);
-
-	DELAY(1000000);
-
-	thread_lock(curthread);
-	sched_bind(curthread, cpu);
-
-	flags = intr_disable();
-	tsc_end = rdtsc_ordered();
-	t_end = tc->tc_get_timecount(tc) & tc->tc_counter_mask;
-	intr_restore(flags);
-
-	sched_unbind(curthread);
-	thread_unlock(curthread);
-
-	if (t_end <= t_start) {
-		/* Assume that the counter has wrapped around at most once. */
-		t_end += (uint64_t)tc->tc_counter_mask + 1;
-	}
-
-	freq = tc->tc_frequency * (tsc_end - tsc_start) / (t_end - t_start);
-
+	fpu_kern_enter(curthread, NULL, FPU_KERN_NOCTX);
+	freq = clockcalib(rdtsc_ordered, "TSC");
+	fpu_kern_leave(curthread, NULL);
 	tsc_update_freq(freq);
+
 calibrated:
 	tc_init(&tsc_timecounter);
 	set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant);