twister: support retrying on build errors

Allow retries on build errors, this is now done using
--retry-build-errors. This option is useful when for example build
failures are caused by licensing issues or intermittent network issues.

Signed-off-by: Anas Nashif <anas.nashif@intel.com>
This commit is contained in:
Anas Nashif 2022-03-16 09:44:47 -04:00
parent becef8c83b
commit ab9de7c6d7
2 changed files with 21 additions and 8 deletions

View file

@ -2848,6 +2848,7 @@ class TestSuite(DisablePyTestCollectionMixin):
self.warnings_as_errors = True
self.overflow_as_errors = False
self.quarantine_verify = False
self.retry_build_errors = False
# Keep track of which test cases we've filtered out and why
self.testcases = {}
@ -3519,21 +3520,22 @@ class TestSuite(DisablePyTestCollectionMixin):
for instance in instance_list:
self.instances[instance.name] = instance
def add_tasks_to_queue(self, pipeline, build_only=False, test_only=False):
def add_tasks_to_queue(self, pipeline, build_only=False, test_only=False, retry_build_errors=False):
for instance in self.instances.values():
if build_only:
instance.run = False
if instance.status not in ['passed', 'skipped', 'error']:
no_retry_statuses = ['passed', 'skipped']
if not retry_build_errors:
no_retry_statuses.append("error")
if instance.status not in no_retry_statuses:
logger.debug(f"adding {instance.name}")
instance.status = None
if test_only and instance.run:
pipeline.put({"op": "run", "test": instance})
else:
pipeline.put({"op": "cmake", "test": instance})
# If the instance got 'error' status before, proceed to the report stage
if instance.status == "error":
pipeline.put({"op": "report", "test": instance})
def pipeline_mgr(self, pipeline, done_queue, lock, results):
while True:
@ -3568,7 +3570,8 @@ class TestSuite(DisablePyTestCollectionMixin):
def execute(self, pipeline, done, results):
lock = Lock()
logger.info("Adding tasks to the queue...")
self.add_tasks_to_queue(pipeline, self.build_only, self.test_only)
self.add_tasks_to_queue(pipeline, self.build_only, self.test_only,
retry_build_errors=self.retry_build_errors)
logger.info("Added initial list of jobs to queue")
processes = []

View file

@ -685,6 +685,10 @@ structure in the main Zephyr tree: boards/<arch>/<board_name>/""")
"--retry-interval", type=int, default=60,
help="Retry failing tests after specified period of time.")
parser.add_argument(
"--retry-build-errors", action="store_true",
help="Retry build errors as well.")
parser.add_argument(
"-S", "--enable-slow", action="store_true",
help="Execute time-consuming test cases that have been marked "
@ -918,6 +922,7 @@ def main():
suite.cmake_only = options.cmake_only
suite.cleanup = options.runtime_artifact_cleanup
suite.test_only = options.test_only
suite.retry_build_errors = options.retry_build_errors
suite.enable_slow = options.enable_slow
suite.device_testing = options.device_testing
suite.fixtures = options.fixture
@ -1281,7 +1286,12 @@ def main():
logger.info("%d Iteration:" % (completed))
time.sleep(options.retry_interval) # waiting for the system to settle down
results.done = results.total - results.failed
results.failed = results.error
if options.retry_build_errors:
results.failed = 0
results.error = 0
else:
results.failed = results.error
results = suite.execute(pipeline, done_queue, results)
while True:
@ -1300,7 +1310,7 @@ def main():
retries = retries - 1
# There are cases where failed == error (only build failures),
# we do not try build failures.
if retries == 0 or results.failed == results.error:
if retries == 0 or (results.failed == results.error and not options.retry_build_errors):
break