Fix a bug introduced between 1.1 and 1.1.5. Loading the time was moved

outside the critical region.

Make it work with 2.0.  It wasn't designed to be called at splclock().

Make it work with prescaling.  The overflow threshold was bogus.

Make it work for any HZ.  Side effect of fixing prescaling.

Speed it up.  Allocate registers better.  Reduce multiplication and
division to multiplication and a shift.  Speed is now 5-6 usec on a
486DX/33, was about 3 usec more.

Optimize for the non-pentium case.  The pentium code got moved around
a bit and hasn't been tested.

Change #include's to 2.0 style.
This commit is contained in:
Bruce Evans 1994-11-05 23:53:46 +00:00
parent b2e3ee0a69
commit ae406484f0
1 changed files with 108 additions and 81 deletions

View File

@ -31,54 +31,34 @@
* SUCH DAMAGE. * SUCH DAMAGE.
* *
* from: Steve McCanne's microtime code * from: Steve McCanne's microtime code
* $Id: microtime.s,v 1.5 1994/08/11 00:28:17 wollman Exp $ * $Id: microtime.s,v 1.6 1994/08/13 17:45:09 wollman Exp $
*/ */
#include "machine/asmacros.h" #include <machine/asmacros.h>
#include "../isa/isa.h"
#include "../isa/timerreg.h"
.extern _pentium_mhz #include <i386/isa/icu.h>
#include <i386/isa/isa.h>
#include <i386/isa/timerreg.h>
/*
* Use a higher resolution version of microtime if HZ is not
* overridden (i.e. it is 100Hz).
*/
#ifndef HZ
ENTRY(microtime) ENTRY(microtime)
pushl %edi # save registers
pushl %esi
pushl %ebx
movl $_time, %ebx # get timeval ptr #ifdef I586_CPU
movl _pentium_mhz, %ecx
#if defined(I586_CPU) testl %ecx, %ecx
movl _pentium_mhz, %ecx jne pentium_microtime
orl %ecx, %ecx #else
jz 0f xorl %ecx, %ecx # clear ecx
#endif
cli movb $TIMER_SEL0|TIMER_LATCH, %al # prepare to latch
.byte 0x0f, 0x31 /* RDTSC */
idivl _pentium_mhz /* get value in usec */
movl 4(%ebx), %esi
movl (%ebx), %edi
sti
jmp 4f
#endif /* Pentium code */
0:
movl (%ebx), %edi # sec = time.tv_sec
movl 4(%ebx), %esi # usec = time.tv_usec
cli # disable interrupts cli # disable interrupts
movl $(TIMER_SEL0|TIMER_LATCH), %eax
outb %al, $TIMER_MODE # latch timer 0's counter outb %al, $TIMER_MODE # latch timer 0's counter
inb $TIMER_CNTR0, %al # read counter value, LSB first
xorl %ebx, %ebx # clear ebx movb %al, %cl
inb $TIMER_CNTR0, %al # Read counter value, LSB first
movb %al, %bl
inb $TIMER_CNTR0, %al inb $TIMER_CNTR0, %al
movb %al, %bh movb %al, %ch
# Now check for counter overflow. This is tricky because the # Now check for counter overflow. This is tricky because the
# timer chip doesn't let us atomically read the current counter # timer chip doesn't let us atomically read the current counter
@ -92,63 +72,110 @@ ENTRY(microtime)
# from the IRR, and mistakenly add a correction to the "close # from the IRR, and mistakenly add a correction to the "close
# to zero" value. # to zero" value.
# #
# We compare the counter value to heuristic constant 11890. # We compare the counter value to the prepared overflow threshold.
# If the counter value is less than this, we assume the counter # If the counter value is less than this, we assume the counter
# didn't overflow between disabling interrupts above and latching # didn't overflow between disabling timer interrupts and latching
# the counter value. For example, we assume that the above 10 or so # the counter value above. For example, we assume that interrupts
# instructions take less than 11932 - 11890 = 42 microseconds to # are enabled when we are called (or were disabled just a few
# execute. # cycles before we are called and that the instructions before the
# "cli" are fast) and that the "cli" and "outb" instructions take
# less than 10 timer cycles to execute. The last assumption is
# very safe.
# #
# Otherwise, the counter might have overflowed. We check for this # Otherwise, the counter might have overflowed. We check for this
# condition by reading the interrupt request register out of the ICU. # condition by reading the interrupt request register out of the ICU.
# If it overflowed, we add in one clock period. # If it overflowed, we add in one clock period.
# #
# The heuristic is "very accurate" because it works 100% if # The heuristic is "very accurate" because it works 100% if we're
# we're called from an ipl less than the clock. Otherwise, # called with interrupts enabled. Otherwise, it might not work.
# it might not work. Currently, only gettimeofday and bpf # Currently, only siointrts() calls us with interrupts disabled, so
# call microtime so it's not a problem. # the problem can be avoided at some cost to the general case. The
# costs are complications in callers to disable interrupts in
# IO_ICU1 and extra reads of the IRR forced by a conservative
# overflow threshold.
#
# In 2.0, we are called at splhigh() from mi_switch(), so we have
# to allow for the overflow bit being in ipending instead of in
# the IRR. Our caller may have executed many instructions since
# ipending was set, so the heuristic for the IRR is inappropriate
# for ipending. However, we don't need another heuristic, since
# the "cli" suffices to lock ipending.
movl _timer0_prescale, %eax # adjust value if timer is movl _timer0_max_count, %edx # prepare for 2 uses
addl _timer0_divisor, %eax # reprogrammed
addl $-11932, %eax
subl %eax, %ebx
cmpl $11890, %ebx # do we have a possible overflow condition testb $IRQ0, _ipending # is a soft timer interrupt pending?
jle 1f jne overflow
# Do we have a possible overflow condition?
cmpl _timer0_overflow_threshold, %ecx
jbe 1f
inb $IO_ICU1, %al # read IRR in ICU inb $IO_ICU1, %al # read IRR in ICU
testb $1, %al # is a timer interrupt pending? testb $IRQ0, %al # is a hard timer interrupt pending?
je 1f je 1f
addl $-11932, %ebx # yes, subtract one clock period overflow:
subl %edx, %ecx # some intr pending, count timer down through 0
1: 1:
# Subtract counter value from max count since it is a count-down value.
subl %ecx, %edx
# Adjust for partial ticks.
addl _timer0_prescaler_count, %edx
# To divide by 1.193200, we multiply by 27465 and shift right by 15.
#
# The multiplier was originally calculated to be
#
# 2^18 * 1000000 / 1193200 = 219698.
#
# The frequency is 1193200 to be compatible with rounding errors in
# the calculation of the usual maximum count. 2^18 is the largest
# power of 2 such that multiplying `i' by it doesn't overflow for i
# in the range of interest ([0, 11932 + 5)). We adjusted the
# multiplier a little to minimise the average of
#
# fabs(i / 1.1193200 - ((multiplier * i) >> 18))
#
# for i in the range and then removed powers of 2 to speed up the
# multiplication and to avoid overflow for i outside the range
# (i may be as high as 2^17 if the timer is programmed to its
# maximum maximum count). The absolute error is less than 1 for
# all i in the range.
#if 0
imul $27645, %edx # 25 cycles on a 486
#else
leal (%edx,%edx,2), %eax # a = 3 2 cycles on a 486
leal (%edx,%eax,4), %eax # a = 13 2
movl %eax, %ecx # c = 13 1
shl $5, %eax # a = 416 2
addl %ecx, %eax # a = 429 1
leal (%edx,%eax,8), %eax # a = 3433 2
leal (%edx,%eax,8), %eax # a = 27465 2 (total 12 cycles)
#endif /* 0 */
shr $15, %eax
common_microtime:
addl _time+4, %eax # usec += time.tv_sec
movl _time, %edx # sec = time.tv_sec
sti # enable interrupts sti # enable interrupts
movl $11932, %eax # subtract counter value from 11932 since cmpl $1000000, %eax # usec valid?
subl %ebx, %eax # it is a count-down value jb 1f
subl $1000000, %eax # adjust usec
incl %edx # bump sec
1:
movl 4(%esp), %ecx # load timeval pointer arg
movl %edx, (%ecx) # tvp->tv_sec = sec
movl %eax, 4(%ecx) # tvp->tv_usec = usec
movl %eax, %ebx # this really is a "imull $1000, %eax, %eax"
sall $10, %eax # instruction, but this saves us
sall $3, %ebx # 33/23 clocks on a 486/386 CPU
subl %ebx, %eax #
sall $1, %ebx # /sos
subl %ebx, %eax #
movl $0, %edx # zero extend eax into edx for div
movl $1193, %ecx
idivl %ecx # convert to usecs: mult by 1000/1193
4:
addl %eax, %esi # add counter usecs to time.tv_usec
cmpl $1000000, %esi # carry in timeval?
jl 2f
subl $1000000, %esi # adjust usec
incl %edi # bump sec
2:
movl 16(%esp), %ecx # load timeval pointer arg
movl %edi, (%ecx) # tvp->tv_sec = sec
movl %esi, 4(%ecx) # tvp->tv_usec = usec
popl %ebx # restore regs
popl %esi
popl %edi
ret ret
#endif /* normal value of HZ */
ALIGN_TEXT
pentium_microtime:
cli
.byte 0x0f, 0x31 # RDTSC
divl %ecx # get value in usec
jmp common_microtime