task_wdt: add task-level watchdog subsystem

This new subsystem can be used to supervise individual threads. It
is based on a regularly updated kernel timer, whose ISR is never
actually called in regular system operation.

An existing hardware watchdog can be used as an optional fallback if
the task watchdog itself gets stuck.

Signed-off-by: Martin Jäger <martin@libre.solar>
This commit is contained in:
Martin Jäger 2020-11-18 16:23:17 +01:00 committed by Carles Cufí
parent 5b87cca98b
commit 1aaf508bde
10 changed files with 448 additions and 0 deletions

View file

@ -596,6 +596,7 @@
/subsys/shell/ @jakub-uC @nordic-krch /subsys/shell/ @jakub-uC @nordic-krch
/subsys/stats/ @nvlsianpu /subsys/stats/ @nvlsianpu
/subsys/storage/ @nvlsianpu /subsys/storage/ @nvlsianpu
/subsys/task_wdt/ @martinjaeger
/subsys/testsuite/ @nashif /subsys/testsuite/ @nashif
/subsys/timing/ @nashif @dcpleung /subsys/timing/ @nashif @dcpleung
/subsys/usb/ @jfischer-no /subsys/usb/ @jfischer-no

View file

@ -30,6 +30,7 @@ API Reference
resource_management/index.rst resource_management/index.rst
shell/index.rst shell/index.rst
storage/index.rst storage/index.rst
task_wdt/index.rst
misc/timeutil.rst misc/timeutil.rst
usb/index.rst usb/index.rst
usermode/index.rst usermode/index.rst

View file

@ -282,6 +282,11 @@ current :ref:`stability level <api_lifecycle>`.
- 2.3 - 2.3
- 2.3 - 2.3
* - :ref:`task_wdt_api`
- Experimental
- 2.5
- 2.5
* - :ref:`uart_api` * - :ref:`uart_api`
- Stable - Stable
- 1.0 - 1.0

View file

@ -0,0 +1,55 @@
.. _task_wdt_api:
Task Watchdog
#############
Overview
********
Many microcontrollers feature a hardware watchdog timer peripheral. Its purpose
is to trigger an action (usually a system reset) in case of severe software
malfunctions. Once initialized, the watchdog timer has to be restarted ("fed")
in regular intervals to prevent it from timing out. If the software got stuck
and does not manage to feed the watchdog anymore, the corrective action is
triggered to bring the system back to normal operation.
In real-time operating systems with multiple tasks running in parallel, a
single watchdog instance may not be sufficient anymore, as it can be used for
only one task. This software watchdog based on kernel timers provides a method
to supervise multiple threads or tasks (called watchdog channels).
An existing hardware watchdog can be used as an optional fallback if the task
watchdog itself or the scheduler has a malfunction.
The task watchdog uses a kernel timer as its backend. If configured properly,
the timer ISR is never actually called during normal operation, as the timer is
continuously updated in the feed calls.
It's currently not possible to have multiple instances of task watchdogs.
Instead, the task watchdog API can be accessed globally to add or delete new
channels without passing around a context or device pointer in the firmware.
The maximum number of channels is predefined via Kconfig and should be adjusted
to match exactly the number of channels required by the application.
Configuration Options
*********************
Related configuration options can be found under
:zephyr_file:`subsys/task_wdt/Kconfig`.
* :option:`CONFIG_TASK_WDT`
* :option:`CONFIG_TASK_WDT_CHANNELS`
* :option:`CONFIG_TASK_WDT_HW_FALLBACK`
* :option:`CONFIG_TASK_WDT_MIN_TIMEOUT`
* :option:`CONFIG_TASK_WDT_HW_FALLBACK_DELAY`
API Reference
*************
.. doxygengroup:: task_wdt_api
:project: Zephyr

108
include/task_wdt/task_wdt.h Normal file
View file

@ -0,0 +1,108 @@
/*
* Copyright (c) 2020 Libre Solar Technologies GmbH
*
* SPDX-License-Identifier: Apache-2.0
*/
/**
* @file
* @brief Task watchdog header file
*
* This header file declares prototypes for the task watchdog APIs.
*
* The task watchdog can be used to monitor correct operation of individual
* threads. It can be used together with a hardware watchdog as a fallback.
*/
#ifndef TASK_WDT_H_
#define TASK_WDT_H_
#include <zephyr/types.h>
#include <kernel.h>
#include <device.h>
/**
* @brief Task Watchdog APIs
* @defgroup task_wdt_api Task Watchdog APIs
* @ingroup subsystem
* @{
*/
#ifdef __cplusplus
extern "C" {
#endif
/** Task watchdog callback. */
typedef void (*task_wdt_callback_t)(int channel_id, void *user_data);
/**
* @brief Initialize task watchdog.
*
* This function sets up necessary kernel timers and the hardware watchdog (if
* desired as fallback). It has to be called before task_wdt_add() and
* task_wdt_feed().
*
* @param hw_wdt Pointer to the hardware watchdog device used as fallback.
* Pass NULL if no hardware watchdog fallback is desired.
*
* @retval 0 If successful.
* @retval -ENOTSUP If assigning a hardware watchdog is not supported.
*/
int task_wdt_init(const struct device *hw_wdt);
/**
* @brief Install new timeout.
*
* Adds a new timeout to the list of task watchdog channels.
*
* @param reload_period Period in milliseconds used to reset the timeout
* @param callback Function to be called when watchdog timer expired. Pass
* NULL to use system reset handler.
* @param user_data User data to associate with the watchdog channel.
*
* @retval channel_id If successful, a non-negative value indicating the index
* of the channel to which the timeout was assigned. This
* ID is supposed to be used as the parameter in calls to
* task_wdt_feed().
* @retval -EINVAL If the reload_period is invalid.
* @retval -ENOMEM If no more timeouts can be installed.
*/
int task_wdt_add(uint32_t reload_period, task_wdt_callback_t callback,
void *user_data);
/**
* @brief Delete task watchdog channel.
*
* Deletes the specified channel from the list of task watchdog channels. The
* channel is now available again for other tasks via task_wdt_add() function.
*
* @param channel_id Index of the channel as returned by task_wdt_add().
*
* @retval 0 If successful.
* @retval -EINVAL If there is no installed timeout for supplied channel.
*/
int task_wdt_delete(int channel_id);
/**
* @brief Feed specified watchdog channel.
*
* This function loops through all installed task watchdogs and updates the
* internal kernel timer used as for the software watchdog with the next due
* timeout.
*
* @param channel_id Index of the fed channel as returned by task_wdt_add().
*
* @retval 0 If successful.
* @retval -EINVAL If there is no installed timeout for supplied channel.
*/
int task_wdt_feed(int channel_id);
#ifdef __cplusplus
}
#endif
/**
* @}
*/
#endif /* TASK_WDT_H_ */

View file

@ -21,6 +21,7 @@ add_subdirectory_ifdef(CONFIG_SETTINGS settings)
add_subdirectory(fb) add_subdirectory(fb)
add_subdirectory(power) add_subdirectory(power)
add_subdirectory(stats) add_subdirectory(stats)
add_subdirectory(task_wdt)
add_subdirectory(testsuite) add_subdirectory(testsuite)
add_subdirectory(tracing) add_subdirectory(tracing)
add_subdirectory_ifdef(CONFIG_JWT jwt) add_subdirectory_ifdef(CONFIG_JWT jwt)

View file

@ -52,6 +52,8 @@ source "subsys/storage/Kconfig"
source "subsys/settings/Kconfig" source "subsys/settings/Kconfig"
source "subsys/task_wdt/Kconfig"
source "subsys/testsuite/Kconfig" source "subsys/testsuite/Kconfig"
source "subsys/timing/Kconfig" source "subsys/timing/Kconfig"

View file

@ -0,0 +1,3 @@
# SPDX-License-Identifier: Apache-2.0
zephyr_sources_ifdef(CONFIG_TASK_WDT task_wdt.c)

60
subsys/task_wdt/Kconfig Normal file
View file

@ -0,0 +1,60 @@
# Software watchdog configuration
# Copyright (c) 2020 Libre Solar Technologies GmbH
# SPDX-License-Identifier: Apache-2.0
menuconfig TASK_WDT
bool "Task-level software watchdog"
select REBOOT
help
Enable task watchdog
The task watchdog allows to have individual watchdog channels
per thread, even if the hardware supports only a single watchdog.
config TASK_WDT_CHANNELS
int "Maximum number of task watchdog channels"
depends on TASK_WDT
default 5
range 2 100
help
The timeouts for each channel are stored in an array. Allocate only
the required amount of channels to reduce memory footprint.
config TASK_WDT_HW_FALLBACK
bool "Use hardware watchdog as a fallback"
depends on TASK_WDT
default y
help
This option allows to specify a hardware watchdog device in the
application that is used as an additional safety layer if the task
watchdog itself gets stuck.
config TASK_WDT_MIN_TIMEOUT
int "Minimum timeout for task watchdog (ms)"
depends on TASK_WDT_HW_FALLBACK
default 100
range 1 10000
help
The task watchdog uses a continuously restarted k_timer as its
backend. This value specifies the minimum timeout in milliseconds
among all task watchdogs used in the application.
If a hardware watchdog is configured as a fallback for the task
watchdog, its timeout is set to this value plus
TASK_WDT_HW_FALLBACK_DELAY.
config TASK_WDT_HW_FALLBACK_DELAY
int "Additional delay for hardware watchdog (ms)"
depends on TASK_WDT_HW_FALLBACK
default 20
range 1 1000
help
The timeout of the hardware watchdog fallback will be increased by
this value to provide sufficient time for corrective actions in the
callback function.
In addition to that, the delay allows to compensate deviations
between different clock sources for the hardware watchdog and the
kernel timer. This is especially important if the hardware watchdog
is clocked by an inaccurate low-speed RC oscillator.

212
subsys/task_wdt/task_wdt.c Normal file
View file

@ -0,0 +1,212 @@
/*
* Copyright (c) 2020 Libre Solar Technologies GmbH
*
* SPDX-License-Identifier: Apache-2.0
*/
#include "task_wdt/task_wdt.h"
#include <drivers/watchdog.h>
#include <power/reboot.h>
#include <device.h>
#include <errno.h>
#define LOG_LEVEL CONFIG_WDT_LOG_LEVEL
#include <logging/log.h>
LOG_MODULE_REGISTER(task_wdt);
/*
* This dummy channel is used to continue feeding the hardware watchdog if the
* task watchdog timeouts are too long for regular updates
*/
#define TASK_WDT_BACKGROUND_CHANNEL (-1)
/*
* Task watchdog channel data
*/
struct task_wdt_channel {
/* period in milliseconds used to reset the timeout, set to 0 to
* indicate that the channel is available
*/
uint32_t reload_period;
/* abs. ticks when this channel expires (updated by task_wdt_feed) */
int64_t timeout_abs_ticks;
/* user data passed to the callback function */
void *user_data;
/* function to be called when watchdog timer expired */
task_wdt_callback_t callback;
};
/* array of all task watchdog channels */
static struct task_wdt_channel channels[CONFIG_TASK_WDT_CHANNELS];
/* timer used for watchdog handling */
static struct k_timer timer;
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
/* pointer to the hardware watchdog used as a fallback */
static const struct device *hw_wdt_dev;
static int hw_wdt_channel;
static bool hw_wdt_started;
#endif
/**
* @brief Task watchdog timer callback.
*
* If the device operates as intended, this function will never be called,
* as the timer is continuously restarted with the next due timeout in the
* task_wdt_feed() function.
*
* If all task watchdogs have longer timeouts than the hardware watchdog,
* this function is called regularly (via the background channel). This
* should be avoided by setting CONFIG_TASK_WDT_MIN_TIMEOUT to the minimum
* task watchdog timeout used in the application.
*
* @param timer_id Pointer to the timer which called the function
*/
static void task_wdt_trigger(struct k_timer *timer_id)
{
int channel_id = (int)k_timer_user_data_get(timer_id);
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
if (channel_id == TASK_WDT_BACKGROUND_CHANNEL) {
if (hw_wdt_dev) {
wdt_feed(hw_wdt_dev, 0);
}
return;
}
#endif
if (channels[channel_id].reload_period == 0) {
/* channel was deleted */
return;
} else if (channels[channel_id].callback) {
channels[channel_id].callback(channel_id,
channels[channel_id].user_data);
} else {
sys_reboot(SYS_REBOOT_COLD);
}
}
int task_wdt_init(const struct device *hw_wdt)
{
if (hw_wdt) {
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
struct wdt_timeout_cfg wdt_config;
wdt_config.flags = WDT_FLAG_RESET_SOC;
wdt_config.window.min = 0U;
wdt_config.window.max = CONFIG_TASK_WDT_MIN_TIMEOUT +
CONFIG_TASK_WDT_HW_FALLBACK_DELAY;
wdt_config.callback = NULL;
hw_wdt_dev = hw_wdt;
hw_wdt_channel = wdt_install_timeout(hw_wdt_dev, &wdt_config);
#else
return -ENOTSUP;
#endif
}
k_timer_init(&timer, task_wdt_trigger, NULL);
return 0;
}
int task_wdt_add(uint32_t reload_period, task_wdt_callback_t callback,
void *user_data)
{
if (reload_period == 0) {
return -EINVAL;
}
/* look for unused channel (reload_period set to 0) */
for (int id = 0; id < ARRAY_SIZE(channels); id++) {
if (channels[id].reload_period == 0) {
channels[id].reload_period = reload_period;
channels[id].user_data = user_data;
channels[id].timeout_abs_ticks = K_TICKS_FOREVER;
channels[id].callback = callback;
task_wdt_feed(id);
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
if (!hw_wdt_started && hw_wdt_dev) {
/* also start fallback hw wdt */
wdt_setup(hw_wdt_dev, 0);
hw_wdt_started = true;
}
#endif
return id;
}
}
return -ENOMEM;
}
int task_wdt_delete(int channel_id)
{
if (channel_id < 0 || channel_id >= ARRAY_SIZE(channels)) {
return -EINVAL;
}
channels[channel_id].reload_period = 0;
return 0;
}
int task_wdt_feed(int channel_id)
{
int64_t current_ticks;
int next_channel_id; /* channel which will time out next */
int64_t next_timeout; /* timeout in absolute ticks of this channel */
if (channel_id < 0 || channel_id >= ARRAY_SIZE(channels)) {
return -EINVAL;
}
/*
* We need a critical section instead of a mutex while updating the
* channels array in order to prevent priority inversion. Otherwise,
* a low priority thread could be preempted before releasing the mutex
* and block a high priority thread that wants to feed its task wdt.
*/
k_sched_lock();
current_ticks = z_tick_get();
/* feed the specified channel */
channels[channel_id].timeout_abs_ticks = current_ticks +
k_ms_to_ticks_ceil64(channels[channel_id].reload_period);
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
next_channel_id = TASK_WDT_BACKGROUND_CHANNEL;
next_timeout = current_ticks +
k_ms_to_ticks_ceil64(CONFIG_TASK_WDT_MIN_TIMEOUT);
#else
next_channel_id = 0;
next_timeout = INT64_MAX;
#endif
/* find minimum timeout of all channels */
for (int id = 0; id < ARRAY_SIZE(channels); id++) {
if (channels[id].reload_period != 0 &&
channels[id].timeout_abs_ticks < next_timeout) {
next_channel_id = id;
next_timeout = channels[id].timeout_abs_ticks;
}
}
/* update task wdt kernel timer */
k_timer_user_data_set(&timer, (void *)next_channel_id);
k_timer_start(&timer, K_TIMEOUT_ABS_TICKS(next_timeout),
K_TIMEOUT_ABS_TICKS(next_timeout));
#ifdef CONFIG_TASK_WDT_HW_FALLBACK
if (hw_wdt_dev) {
wdt_feed(hw_wdt_dev, 0);
}
#endif
k_sched_unlock();
return 0;
}