riscv: timer: driver revamp

Several issues:

- `last_count` should not be updated with current time or this will
  cause a time drift and induce jitter due to IRQ servicing latency.

- `sys_clock_set_timeout()` should not base its `mtime` on the current
  time either. Tracking the `last_tick` and `last_elapsed` values avoids
  the need for all the tick rounding computation.

- The MIN_DELAY thing is pointless. If the delay gets close or even behind
  current time then the IRQ will be triggered right away. This is unlikely
  to happen very often anyway so the constant overhead is uncalled for.

- Runtime 64-bits divisions on 32-bits hardware are very expensive.

Fix the above, and improve the following:

- Prime the accounting by simply invoking the IRQ handler from the init
  code. That will make the "ticks since boot" counter right.

- Remove excessive casts, especially a few wrong ones.

- Simplify the code overall.

Here's the output from the timer_jitter_drift test.

Before this patch:

|timer clock rate 60000000, kernel tick rate 10000
|period duration statistics for 10000 samples (0 rollovers):
|  expected: 1000 us,            60000.000000 cycles
|  min:      907.600000 us,      54456 cycles
|  max:      1099.750000 us,     65985 cycles
|  mean:     1008.594633 us,     60515.678000 cycles
|  variance: 2.184205 us,        7863.136316 cycles
|  stddev:   1.477906 us,        88.674332 cycles
|timer start cycle 995589, end cycle 606152369,
|total time 10085946.333333 us, expected time 10000000.000000 us,
|expected time drift 0.000000 us, difference 85946.333333 us

After this patch:

|timer clock rate 60000000, kernel tick rate 10000
|period duration statistics for 10000 samples (0 rollovers):
|  expected: 1000 us,            60000.000000 cycles
|  min:      992.116667 us,      59527 cycles
|  max:      1030.366667 us,     61822 cycles
|  mean:     1000.001902 us,     60000.114100 cycles
|  variance: 0.105334 us,        379.201081 cycles
|  stddev:   0.324551 us,        19.473087 cycles
|timer start cycle 987431, end cycle 600988572,
|total time 10000019.016667 us, expected time 10000000.000000 us,
|expected time drift 0.000000 us, difference 19.016667 us

The mean, variance and standard deviation number differences speak for
themselves, even in the absence of competing ISRs and/or IRQ-disabled
periods which would have made the comparison even worse.

Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
This commit is contained in:
Nicolas Pitre 2023-02-02 01:19:54 -05:00 committed by Anas Nashif
parent df117e563a
commit 11a2107d99

View file

@ -64,22 +64,22 @@
#define TIMER_IRQN DT_INST_IRQN(0)
#endif
#define CYC_PER_TICK ((uint32_t)((uint64_t) (sys_clock_hw_cycles_per_sec() \
>> CONFIG_RISCV_MACHINE_TIMER_SYSTEM_CLOCK_DIVIDER) \
/ (uint64_t)CONFIG_SYS_CLOCK_TICKS_PER_SEC))
#define MAX_CYC INT_MAX
#define MAX_TICKS ((MAX_CYC - CYC_PER_TICK) / CYC_PER_TICK)
#define MIN_DELAY CONFIG_RISCV_MACHINE_TIMER_MIN_DELAY
#define CYC_PER_TICK (uint32_t)(sys_clock_hw_cycles_per_sec() \
/ CONFIG_SYS_CLOCK_TICKS_PER_SEC)
#define TICKLESS IS_ENABLED(CONFIG_TICKLESS_KERNEL)
/* the unsigned long cast limits divisions to native CPU register width */
#define cycle_diff_t unsigned long
static struct k_spinlock lock;
static uint64_t last_count;
static uint64_t last_ticks;
static uint32_t last_elapsed;
#if defined(CONFIG_TEST)
const int32_t z_sys_timer_irq_for_test = TIMER_IRQN;
#endif
static uint64_t get_hart_mtimecmp(void)
static uintptr_t get_hart_mtimecmp(void)
{
return MTIMECMP_REG + (arch_proc_id() * 8);
}
@ -89,7 +89,7 @@ static void set_mtimecmp(uint64_t time)
#ifdef CONFIG_64BIT
*(volatile uint64_t *)get_hart_mtimecmp() = time;
#else
volatile uint32_t *r = (uint32_t *)(uint32_t)get_hart_mtimecmp();
volatile uint32_t *r = (uint32_t *)get_hart_mtimecmp();
/* Per spec, the RISC-V MTIME/MTIMECMP registers are 64 bit,
* but are NOT internally latched for multiword transfers. So
@ -126,62 +126,55 @@ static void timer_isr(const void *arg)
ARG_UNUSED(arg);
k_spinlock_key_t key = k_spin_lock(&lock);
uint64_t now = mtime();
uint32_t dticks = (uint32_t)((now - last_count) / CYC_PER_TICK);
uint64_t dcycles = now - last_count;
uint32_t dticks = (cycle_diff_t)dcycles / CYC_PER_TICK;
last_count = now;
last_count += (cycle_diff_t)dticks * CYC_PER_TICK;
last_ticks += dticks;
last_elapsed = 0;
if (!TICKLESS) {
if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
uint64_t next = last_count + CYC_PER_TICK;
if ((int64_t)(next - now) < MIN_DELAY) {
next += CYC_PER_TICK;
}
set_mtimecmp(next);
}
k_spin_unlock(&lock, key);
sys_clock_announce(IS_ENABLED(CONFIG_TICKLESS_KERNEL) ? dticks : 1);
sys_clock_announce(dticks);
}
void sys_clock_set_timeout(int32_t ticks, bool idle)
{
ARG_UNUSED(idle);
#if defined(CONFIG_TICKLESS_KERNEL)
/* RISCV has no idle handler yet, so if we try to spin on the
* logic below to reset the comparator, we'll always bump it
* forward to the "next tick" due to MIN_DELAY handling and
* the interrupt will never fire! Just rely on the fact that
* the OS gave us the proper timeout already.
*/
if (idle) {
if (!IS_ENABLED(CONFIG_TICKLESS_KERNEL)) {
return;
}
ticks = ticks == K_TICKS_FOREVER ? MAX_TICKS : ticks;
ticks = CLAMP(ticks - 1, 0, (int32_t)MAX_TICKS);
if (ticks == K_TICKS_FOREVER) {
set_mtimecmp(UINT64_MAX);
return;
}
/*
* Clamp the max period length to a number of cycles that can fit
* in half the range of a cycle_diff_t for native width divisions
* to be usable elsewhere. Also clamp it to half the range of an
* int32_t as this is the type used for elapsed tick announcements.
* The half range gives us extra room to cope with the unavoidable IRQ
* servicing latency. The compiler should optimize away the least
* restrictive of those tests automatically.
*/
ticks = CLAMP(ticks, 0, (cycle_diff_t)-1 / 2 / CYC_PER_TICK);
ticks = CLAMP(ticks, 0, INT32_MAX / 2);
k_spinlock_key_t key = k_spin_lock(&lock);
uint64_t now = mtime();
uint32_t adj, cyc = ticks * CYC_PER_TICK;
uint64_t cyc = (last_ticks + last_elapsed + ticks) * CYC_PER_TICK;
/* Round up to next tick boundary. */
adj = (uint32_t)(now - last_count) + (CYC_PER_TICK - 1);
if (cyc <= MAX_CYC - adj) {
cyc += adj;
} else {
cyc = MAX_CYC;
}
cyc = (cyc / CYC_PER_TICK) * CYC_PER_TICK;
if ((int32_t)(cyc + last_count - now) < MIN_DELAY) {
cyc += CYC_PER_TICK;
}
set_mtimecmp(cyc + last_count);
set_mtimecmp(cyc);
k_spin_unlock(&lock, key);
#endif
}
uint32_t sys_clock_elapsed(void)
@ -191,20 +184,23 @@ uint32_t sys_clock_elapsed(void)
}
k_spinlock_key_t key = k_spin_lock(&lock);
uint32_t ret = ((uint32_t)mtime() - (uint32_t)last_count) / CYC_PER_TICK;
uint64_t now = mtime();
uint64_t dcycles = now - last_count;
uint32_t dticks = (cycle_diff_t)dcycles / CYC_PER_TICK;
last_elapsed = dticks;
k_spin_unlock(&lock, key);
return ret;
return dticks;
}
uint32_t sys_clock_cycle_get_32(void)
{
return (uint32_t)(mtime() << CONFIG_RISCV_MACHINE_TIMER_SYSTEM_CLOCK_DIVIDER);
return ((uint32_t)mtime()) << CONFIG_RISCV_MACHINE_TIMER_SYSTEM_CLOCK_DIVIDER;
}
uint64_t sys_clock_cycle_get_64(void)
{
return (mtime() << CONFIG_RISCV_MACHINE_TIMER_SYSTEM_CLOCK_DIVIDER);
return mtime() << CONFIG_RISCV_MACHINE_TIMER_SYSTEM_CLOCK_DIVIDER;
}
static int sys_clock_driver_init(const struct device *dev)
@ -212,8 +208,7 @@ static int sys_clock_driver_init(const struct device *dev)
ARG_UNUSED(dev);
IRQ_CONNECT(TIMER_IRQN, 0, timer_isr, NULL, 0);
last_count = mtime();
set_mtimecmp(last_count + CYC_PER_TICK);
timer_isr(NULL); /* prime it */
irq_enable(TIMER_IRQN);
return 0;
}