logging: add support for dictionary based logging

This adds dictionary based logging support. Dictionary based
logging is binary based where one big difference is that
static strings are stored as pointers instead of the whole
string. This results in reduced space requirements for
storing log messages in certain scenairos.

Signed-off-by: Daniel Leung <daniel.leung@intel.com>
This commit is contained in:
Daniel Leung 2021-04-02 12:54:53 -07:00 committed by Carles Cufí
parent 2c6da987cd
commit a5ab1a7518
13 changed files with 1391 additions and 0 deletions

View file

@ -1524,6 +1524,24 @@ list(APPEND
${extra_post_build_byproducts}
)
if(CONFIG_LOG_DICTIONARY_SUPPORT)
set(LOG_DICT_DB_NAME ${PROJECT_BINARY_DIR}/log_dictionary.json)
list(APPEND
post_build_commands
COMMAND
${PYTHON_EXECUTABLE}
${ZEPHYR_BASE}/scripts/logging/dictionary/database_gen.py
${KERNEL_ELF_NAME}
${LOG_DICT_DB_NAME}
--build ${BUILD_VERSION}
)
list(APPEND
post_build_byproducts
${LOG_DICT_DB_NAME}
)
endif()
# Add post_build_commands to post-process the final .elf file produced by
# either the ZEPHYR_PREBUILT_EXECUTABLE or the KERNEL_ELF executable
# targets above.

View file

@ -537,6 +537,7 @@
/scripts/coccinelle/ @himanshujha199640 @JuliaLawall
/scripts/coredump/ @dcpleung
/scripts/kconfig/ @ulfalizer
/scripts/logging/dictionary/ @dcpleung
/scripts/pylib/twister/expr_parser.py @nashif
/scripts/schemas/twister/ @nashif
/scripts/gen_app_partitions.py @dcpleung @nashif

View file

@ -0,0 +1,75 @@
/*
* Copyright (c) 2018 Nordic Semiconductor ASA
* Copyright (c) 2021 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef ZEPHYR_INCLUDE_LOGGING_LOG_OUTPUT_DICT_H_
#define ZEPHYR_INCLUDE_LOGGING_LOG_OUTPUT_DICT_H_
#include <logging/log_output.h>
#include <logging/log_msg2.h>
#include <stdarg.h>
#include <toolchain.h>
#include <sys/util.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Log message type
*/
enum log_dict_output_msg_type {
MSG_NORMAL = 0,
MSG_DROPPED_MSG = 1,
};
/**
* Output header for one dictionary based log message.
*/
struct log_dict_output_normal_msg_hdr_t {
uint8_t type;
uint32_t domain:3;
uint32_t level:3;
uint32_t package_len:10;
uint32_t data_len:12;
uintptr_t source;
log_timestamp_t timestamp;
} __packed;
/**
* Output for one dictionary based log message about
* dropped messages.
*/
struct log_dict_output_dropped_msg_t {
uint8_t type;
uint16_t num_dropped_messages;
} __packed;
/** @brief Process log messages v2 for dictionary-basde logging.
*
* Function is using provided context with the buffer and output function to
* process formatted string and output the data.
*
* @param log_output Pointer to the log output instance.
* @param msg Log message.
* @param flags Optional flags.
*/
void log_dict_output_msg2_process(const struct log_output *log_output,
struct log_msg2 *msg, uint32_t flags);
/** @brief Process dropped messages indication for dictionary-based logging.
*
* Function prints error message indicating lost log messages.
*
* @param output Pointer to the log output instance.
* @param cnt Number of dropped messages.
*/
void log_dict_output_dropped_process(const struct log_output *output, uint32_t cnt);
#ifdef __cplusplus
}
#endif
#endif /* ZEPHYR_INCLUDE_LOGGING_LOG_OUTPUT_DICT_H_ */

View file

@ -0,0 +1,291 @@
#!/usr/bin/env python3
#
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""
Dictionary-based Logging Database Generator
This takes the built Zephyr ELF binary and produces a JSON database
file for dictionary-based logging. This database is used together
with the parser to decode binary log messages.
"""
import argparse
import logging
import os
import struct
import sys
import parser.log_database
from parser.log_database import LogDatabase
import elftools
from elftools.elf.elffile import ELFFile
from elftools.elf.descriptions import describe_ei_data
from elftools.elf.sections import SymbolTableSection
LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s"
logger = logging.getLogger(os.path.basename(sys.argv[0]))
# Sections that contains static strings
STATIC_STRING_SECTIONS = ['rodata', '.rodata', 'log_strings_sections']
def parse_args():
"""Parse command line arguments"""
argparser = argparse.ArgumentParser()
argparser.add_argument("elffile", help="Zephyr ELF binary")
argparser.add_argument("dbfile", help="Dictionary Logging Database file")
argparser.add_argument("--build", help="Build ID")
argparser.add_argument("--debug", action="store_true",
help="Print extra debugging information")
argparser.add_argument("-v", "--verbose", action="store_true",
help="Print more information")
return argparser.parse_args()
def find_elf_sections(elf, sh_name):
"""Find all sections in ELF file"""
for section in elf.iter_sections():
if section.name == sh_name:
ret = {
'name' : section.name,
'size' : section['sh_size'],
'start' : section['sh_addr'],
'end' : section['sh_addr'] + section['sh_size'] - 1,
'data' : section.data(),
}
return ret
return None
def get_kconfig_symbols(elf):
"""Get kconfig symbols from the ELF file"""
for section in elf.iter_sections():
if isinstance(section, SymbolTableSection):
return {sym.name: sym.entry.st_value
for sym in section.iter_symbols()
if sym.name.startswith("CONFIG_")}
raise LookupError("Could not find symbol table")
def find_log_const_symbols(elf):
"""Extract all "log_const_*" symbols from ELF file"""
symbol_tables = [s for s in elf.iter_sections()
if isinstance(s, elftools.elf.sections.SymbolTableSection)]
ret_list = []
for section in symbol_tables:
if not isinstance(section, elftools.elf.sections.SymbolTableSection):
continue
if section['sh_entsize'] == 0:
continue
for symbol in section.iter_symbols():
if symbol.name.startswith("log_const_"):
ret_list.append(symbol)
return ret_list
def parse_log_const_symbols(database, log_const_section, log_const_symbols):
"""Find the log instances and map source IDs to names"""
if database.is_tgt_little_endian():
formatter = "<"
else:
formatter = ">"
if database.is_tgt_64bit():
# 64-bit pointer to string
formatter += "Q"
else:
# 32-bit pointer to string
formatter += "L"
# log instance level
formatter += "B"
datum_size = struct.calcsize(formatter)
# Get the address of first log instance
first_offset = log_const_symbols[0].entry['st_value']
for sym in log_const_symbols:
if sym.entry['st_value'] < first_offset:
first_offset = sym.entry['st_value']
first_offset -= log_const_section['start']
# find all log_const_*
for sym in log_const_symbols:
# Find data offset in log_const_section for this symbol
offset = sym.entry['st_value'] - log_const_section['start']
idx_s = offset
idx_e = offset + datum_size
datum = log_const_section['data'][idx_s:idx_e]
if len(datum) != datum_size:
# Not enough data to unpack
continue
str_ptr, level = struct.unpack(formatter, datum)
# Offset to rodata section for string
instance_name = database.find_string(str_ptr)
logger.info("Found Log Instance: %s, level: %d", instance_name, level)
# source ID is simply the element index in the log instance array
source_id = int((offset - first_offset) / sym.entry['st_size'])
database.add_log_instance(source_id, instance_name, level, sym.entry['st_value'])
def extract_elf_information(elf, database):
"""Extract information from ELF file and store in database"""
e_ident = elf.header['e_ident']
elf_data = describe_ei_data(e_ident['EI_DATA'])
if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']:
database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN)
elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']:
database.set_tgt_endianness(LogDatabase.BIG_ENDIAN)
else:
logger.error("Cannot determine endianness from ELF file, exiting...")
sys.exit(1)
def process_kconfigs(elf, database):
"""Process kconfigs to extract information"""
kconfigs = get_kconfig_symbols(elf)
# 32 or 64-bit target
database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32)
# Architecture
for name, arch in parser.log_database.ARCHS.items():
if arch['kconfig'] in kconfigs:
database.set_arch(name)
break
# Put some kconfigs into the database
#
# Use 32-bit timestamp? or 64-bit?
if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs:
database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT",
kconfigs['CONFIG_LOG_TIMESTAMP_64BIT'])
def extract_static_string_sections(elf, database):
"""Extract sections containing static strings"""
string_sections = STATIC_STRING_SECTIONS
# Some architectures may put static strings into additional sections.
# So need to extract them too.
arch_data = parser.log_database.ARCHS[database.get_arch()]
if "extra_string_section" in arch_data:
string_sections.extend(arch_data['extra_string_section'])
for name in string_sections:
content = find_elf_sections(elf, name)
if content is None:
continue
logger.info("Found section: %s, 0x%x - 0x%x",
name, content['start'], content['end'])
database.add_string_section(name, content)
if not database.has_string_sections():
logger.error("Cannot find any static string sections in ELF, exiting...")
sys.exit(1)
def extract_logging_subsys_information(elf, database):
"""
Extract logging subsys related information and store in database.
For example, this extracts the list of log instances to establish
mapping from source ID to name.
"""
# Extract log constant section for module names
section_log_const = find_elf_sections(elf, "log_const_sections")
if section_log_const is None:
# ESP32 puts "log_const_*" info log_static_section instead of log_const_sections
section_log_const = find_elf_sections(elf, "log_static_section")
if section_log_const is None:
logger.error("Cannot find section 'log_const_sections' in ELF file, exiting...")
sys.exit(1)
# Find all "log_const_*" symbols and parse them
log_const_symbols = find_log_const_symbols(elf)
parse_log_const_symbols(database, section_log_const, log_const_symbols)
def main():
"""Main function of database generator"""
args = parse_args()
# Setup logging
logging.basicConfig(format=LOGGER_FORMAT)
if args.verbose:
logger.setLevel(logging.INFO)
elif args.debug:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.WARNING)
elffile = open(args.elffile, "rb")
if not elffile:
logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile)
sys.exit(1)
logger.info("ELF file %s", args.elffile)
logger.info("Database file %s", args.dbfile)
elf = ELFFile(elffile)
database = LogDatabase()
if args.build:
database.set_build_id(args.build)
logger.info("Build ID: %s", args.build)
extract_elf_information(elf, database)
process_kconfigs(elf, database)
logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits())
if database.is_tgt_little_endian():
logger.info("Endianness: Little")
else:
logger.info("Endianness: Big")
# Extract sections from ELF files that contain strings
extract_static_string_sections(elf, database)
# Extract information related to logging subsystem
extract_logging_subsys_information(elf, database)
# Write database file
if not LogDatabase.write_json_database(args.dbfile, database):
logger.error("ERROR: Cannot open database file for write: %s, exiting...", args.dbfile)
sys.exit(1)
elffile.close()
if __name__ == "__main__":
main()

View file

@ -0,0 +1,131 @@
#!/usr/bin/env python3
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""
Log Parser for Dictionary-based Logging
This uses the JSON database file to decode the input binary
log data and print the log messages.
"""
import argparse
import binascii
import logging
import sys
import parser
from parser.log_database import LogDatabase
LOGGER_FORMAT = "%(message)s"
logger = logging.getLogger("parser")
LOG_HEX_SEP = "##ZLOGV1##"
def parse_args():
"""Parse command line arguments"""
argparser = argparse.ArgumentParser()
argparser.add_argument("dbfile", help="Dictionary Logging Database file")
argparser.add_argument("logfile", help="Log Data file")
argparser.add_argument("--hex", action="store_true",
help="Log Data file is in hexadecimal strings")
argparser.add_argument("--rawhex", action="store_true",
help="Log file only contains hexadecimal log data")
argparser.add_argument("--debug", action="store_true",
help="Print extra debugging information")
return argparser.parse_args()
def main():
"""Main function of log parser"""
args = parse_args()
# Setup logging for parser
logging.basicConfig(format=LOGGER_FORMAT)
if args.debug:
logger.setLevel(logging.DEBUG)
else:
logger.setLevel(logging.INFO)
# Read from database file
database = LogDatabase.read_json_database(args.dbfile)
if database is None:
logger.error("ERROR: Cannot open database file: %s, exiting...", args.dbfile)
sys.exit(1)
# Open log data file for reading
if args.hex:
if args.rawhex:
# Simply log file with only hexadecimal data
logdata = parser.utils.convert_hex_file_to_bin(args.logfile)
else:
hexdata = ''
with open(args.logfile, "r") as hexfile:
for line in hexfile.readlines():
hexdata += line.strip()
if LOG_HEX_SEP not in hexdata:
logger.error("ERROR: Cannot find start of log data, exiting...")
sys.exit(1)
idx = hexdata.index(LOG_HEX_SEP) + len(LOG_HEX_SEP)
hexdata = hexdata[idx:]
if len(hexdata) % 2 != 0:
# Make sure there are even number of characters
idx = int(len(hexdata) / 2) * 2
hexdata = hexdata[:idx]
idx = 0
while idx < len(hexdata):
# When running QEMU via west or ninja, there may be additional
# strings printed by QEMU, west or ninja (for example, QEMU
# is terminated, or user interrupted, etc). So we need to
# figure out where the end of log data stream by
# trying to convert from hex to bin.
idx += 2
try:
binascii.unhexlify(hexdata[:idx])
except binascii.Error:
idx -= 2
break
logdata = binascii.unhexlify(hexdata[:idx])
else:
logfile = open(args.logfile, "rb")
if not logfile:
logger.error("ERROR: Cannot open binary log data file: %s, exiting...", args.logfile)
sys.exit(1)
logdata = logfile.read()
logfile.close()
log_parser = parser.get_parser(database)
if log_parser is not None:
logger.debug("# Build ID: %s", database.get_build_id())
logger.debug("# Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits())
if database.is_tgt_little_endian():
logger.debug("# Endianness: Little")
else:
logger.debug("# Endianness: Big")
ret = log_parser.parse_log_data(logdata, debug=args.debug)
if not ret:
logger.error("ERROR: there were error(s) parsing log data")
sys.exit(1)
else:
logger.error("ERROR: Cannot find a suitable parser matching database version!")
sys.exit(1)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,21 @@
#!/usr/bin/env python3
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""
Dictionary-based Logging Parser Module
"""
from .log_parser_v1 import LogParserV1
def get_parser(database):
"""Get the parser object based on database"""
db_ver = int(database.get_version())
if db_ver == 1:
return LogParserV1(database)
return None

View file

@ -0,0 +1,248 @@
#!/usr/bin/env python3
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""
Class for Dictionary-based Logging Database
"""
import base64
import copy
import json
from .utils import extract_string_from_section
ARCHS = {
"arc" : {
"kconfig": "CONFIG_ARC",
},
"arm" : {
"kconfig": "CONFIG_ARM",
},
"arm64" : {
"kconfig": "CONFIG_ARM64",
},
"sparc" : {
"kconfig": "CONFIG_SPARC",
},
"x86" : {
"kconfig": "CONFIG_X86",
},
"nios2" : {
"kconfig": "CONFIG_NIOS2",
# Small static strings are put into section "datas"
# so we need to include them also.
#
# See include/arch/nios2/linker.ld on .sdata.*
# for explanation.
"extra_string_section": ['datas'],
},
"riscv" : {
"kconfig": "CONFIG_RISCV",
},
"xtensa" : {
"kconfig": "CONFIG_XTENSA",
},
}
class LogDatabase():
"""Class of log database"""
# Update this if binary format of dictionary based logging
# has changed
ZEPHYR_DICT_LOG_VER = 1
LITTLE_ENDIAN = True
BIG_ENDIAN = False
def __init__(self):
new_db = dict()
new_db['version'] = self.ZEPHYR_DICT_LOG_VER
new_db['target'] = dict()
new_db['sections'] = dict()
new_db['log_subsys'] = dict()
new_db['log_subsys']['log_instances'] = dict()
new_db['build_id'] = None
new_db['arch'] = None
new_db['kconfigs'] = dict()
self.database = new_db
def get_version(self):
"""Get Database Version"""
return self.database['version']
def get_build_id(self):
"""Get Build ID"""
return self.database['build_id']
def set_build_id(self, build_id):
"""Set Build ID in Database"""
self.database['build_id'] = build_id
def get_arch(self):
"""Get the Target Architecture"""
return self.database['arch']
def set_arch(self, arch):
"""Set the Target Architecture"""
self.database['arch'] = arch
def get_tgt_bits(self):
"""Get Target Bitness: 32 or 64"""
if 'bits' in self.database['target']:
return self.database['target']['bits']
return None
def set_tgt_bits(self, bits):
"""Set Target Bitness: 32 or 64"""
self.database['target']['bits'] = bits
def is_tgt_64bit(self):
"""Return True if target is 64-bit, False if 32-bit.
None if error."""
if 'bits' not in self.database['target']:
return None
if self.database['target']['bits'] == 32:
return False
if self.database['target']['bits'] == 64:
return True
return None
def get_tgt_endianness(self):
"""
Get Target Endianness.
Return True if little endian, False if big.
"""
if 'little_endianness' in self.database['target']:
return self.database['target']['little_endianness']
return None
def set_tgt_endianness(self, endianness):
"""
Set Target Endianness
True if little endian, False if big.
"""
self.database['target']['little_endianness'] = endianness
def is_tgt_little_endian(self):
"""Return True if target is little endian"""
if 'little_endianness' not in self.database['target']:
return None
return self.database['target']['little_endianness'] == self.LITTLE_ENDIAN
def add_string_section(self, name, sect_dict):
"""Add a static string section to the collection"""
self.database['sections'][name] = sect_dict
def has_string_sections(self):
"""Return True if there are any static string sections"""
return len(self.database['sections']) != 0
def find_string(self, string_ptr):
"""Find string pointed by string_ptr from any static string section.
Return None if not found."""
for _, sect in self.database['sections'].items():
one_str = extract_string_from_section(sect, string_ptr)
if one_str is not None:
return one_str
return None
def add_log_instance(self, source_id, name, level, address):
"""Add one log instance into database"""
self.database['log_subsys']['log_instances'][source_id] = {
'source_id' : source_id,
'name' : name,
'level' : level,
'addr' : address,
}
def get_log_source_string(self, domain_id, source_id):
"""Get the source string based on source ID"""
# JSON stores key as string, so we need to convert
src_id = str(source_id)
if src_id in self.database['log_subsys']['log_instances']:
return self.database['log_subsys']['log_instances'][src_id]['name']
return f"unknown<{domain_id}:{source_id}>"
def add_kconfig(self, name, val):
"""Add a kconfig name-value pair into database"""
self.database['kconfigs'][name] = val
def get_kconfigs(self):
"""Return kconfig name-value pairs"""
return self.database['kconfigs']
@staticmethod
def read_json_database(db_file_name):
"""Read database from file and return a LogDatabase object"""
try:
with open(db_file_name, "r") as db_fd:
json_db = json.load(db_fd)
except (OSError, json.JSONDecodeError):
return None
# Decode data in JSON back into binary data
for _, sect in json_db['sections'].items():
sect['data'] = base64.b64decode(sect['data_b64'])
database = LogDatabase()
database.database = json_db
return database
@staticmethod
def write_json_database(db_file_name, database):
"""Write the database into file"""
json_db = copy.deepcopy(database.database)
# Make database object into something JSON can dump
for _, sect in json_db['sections'].items():
encoded = base64.b64encode(sect['data'])
sect['data_b64'] = encoded.decode('ascii')
del sect['data']
try:
with open(db_file_name, "w") as db_fd:
db_fd.write(json.dumps(json_db))
except OSError:
return False
return True

View file

@ -0,0 +1,22 @@
#!/usr/bin/env python3
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""
Abstract Class for Dictionary-based Logging Parsers
"""
import abc
class LogParser(abc.ABC):
"""Abstract class of log parser"""
def __init__(self, database):
self.database = database
@abc.abstractmethod
def parse_log_data(self, logdata, debug=False):
"""Parse log data"""
return None

View file

@ -0,0 +1,450 @@
#!/usr/bin/env python3
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""
Dictionary-based Logging Parser Version 1
This contains the implementation of the parser for
version 1 databases.
"""
import logging
import math
import struct
from .log_parser import LogParser
HEX_BYTES_IN_LINE = 16
LOG_LEVELS = ['none', 'err', 'wrn', 'inf', 'dbg']
# Need to keep sync with struct log_dict_output_msg_hdr in
# include/logging/log_output_dict.h.
#
# struct log_dict_output_normal_msg_hdr_t {
# uint8_t type;
# uint32_t domain:3;
# uint32_t level:3;
# uint32_t package_len:10;
# uint32_t data_len:12;
# uintptr_t source;
# log_timestamp_t timestamp;
# } __packed;
#
# Note "type" and "timestamp" are encoded separately below.
FMT_MSG_HDR_32 = "II"
FMT_MSG_HDR_64 = "IQ"
# Message type
# 0: normal message
# 1: number of dropped messages
FMT_MSG_TYPE = "B"
# Depends on CONFIG_LOG_TIMESTAMP_64BIT
FMT_MSG_TIMESTAMP_32 = "I"
FMT_MSG_TIMESTAMP_64 = "Q"
# Keep message types in sync with include/logging/log_output_dict.h
MSG_TYPE_NORMAL = 0
MSG_TYPE_DROPPED = 1
# Number of dropped messages
FMT_DROPPED_CNT = "H"
logger = logging.getLogger("parser")
def get_log_level_str(lvl):
"""Convert numeric log level to string"""
if lvl < 0 or lvl >= len(LOG_LEVELS):
return "unk"
return LOG_LEVELS[lvl]
def formalize_fmt_string(fmt_str):
"""Replace unsupported formatter"""
new_str = fmt_str
# Python doesn't support %lld or %llu so need to remove extra 'l'
new_str = new_str.replace("%lld", "%ld")
new_str = new_str.replace("%llu", "%lu")
# No %p for pointer either, so use %x
new_str = new_str.replace("%p", "0x%x")
return new_str
class DataTypes():
"""Class regarding data types, their alignments and sizes"""
INT = 0
UINT = 1
LONG = 2
ULONG = 3
LONG_LONG = 4
ULONG_LONG = 5
PTR = 6
DOUBLE = 7
LONG_DOUBLE = 8
NUM_TYPES = 9
def __init__(self, database):
self.database = database
self.data_types = dict()
if database.is_tgt_64bit():
self.add_data_type(self.LONG, "q")
self.add_data_type(self.LONG_LONG, "q")
self.add_data_type(self.PTR, "Q")
else:
self.add_data_type(self.LONG, "i")
self.add_data_type(self.LONG_LONG, "q")
self.add_data_type(self.PTR, "I")
self.add_data_type(self.INT, "i")
self.add_data_type(self.DOUBLE, "d")
self.add_data_type(self.LONG_DOUBLE, "d")
def add_data_type(self, data_type, fmt):
"""Add one data type"""
if self.database.is_tgt_little_endian():
endianness = "<"
else:
endianness = ">"
formatter = endianness + fmt
self.data_types[data_type] = dict()
self.data_types[data_type]['fmt'] = formatter
size = struct.calcsize(formatter)
if data_type == self.LONG_DOUBLE:
# Python doesn't have long double but we still
# need to skip correct number of bytes
size = 16
self.data_types[data_type]['sizeof'] = size
# Might need actual number for different architectures
# but these seem to work fine for now.
if self.database.is_tgt_64bit():
self.data_types[data_type]['align'] = 8
else:
self.data_types[data_type]['align'] = 4
def get_sizeof(self, data_type):
"""Get sizeof() of a data type"""
return self.data_types[data_type]['sizeof']
def get_alignment(self, data_type):
"""Get the alignment of a data type"""
return self.data_types[data_type]['align']
def get_formatter(self, data_type):
"""Get the formatter for a data type"""
return self.data_types[data_type]['fmt']
class LogParserV1(LogParser):
"""Log Parser V1"""
def __init__(self, database):
super().__init__(database=database)
if self.database.is_tgt_little_endian():
endian = "<"
else:
endian = ">"
self.fmt_msg_type = endian + FMT_MSG_TYPE
self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT
if self.database.is_tgt_64bit():
self.fmt_msg_hdr = endian + FMT_MSG_HDR_64
else:
self.fmt_msg_hdr = endian + FMT_MSG_HDR_32
if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs():
self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64
else:
self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32
self.data_types = DataTypes(self.database)
def __get_string(self, arg, arg_offset, string_tbl):
one_str = self.database.find_string(arg)
if one_str is not None:
ret = one_str
else:
# The index from the string table is basically
# the order in va_list. Need to add to the index
# to skip the packaged string header and
# the format string.
str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2
str_idx /= self.data_types.get_sizeof(DataTypes.INT)
ret = string_tbl[int(str_idx)]
return ret
def process_one_fmt_str(self, fmt_str, arg_list, string_tbl):
"""Parse the format string to extract arguments from
the binary arglist and return a tuple usable with
Python's string formatting"""
idx = 0
arg_offset = 0
is_parsing = False
do_extract = False
args = list()
# Translated from cbvprintf_package()
for idx, fmt in enumerate(fmt_str):
if not is_parsing:
if fmt == '%':
is_parsing = True
arg_data_type = DataTypes.INT
continue
elif fmt == '%':
# '%%' -> literal percentage sign
is_parsing = False
continue
elif fmt == '*':
pass
elif fmt.isdecimal() or str.lower(fmt) == 'l' \
or fmt in (' ', '#', '-', '+', '.', 'h'):
# formatting modifiers, just ignore
continue
elif fmt in ('j', 'z', 't'):
# intmax_t, size_t or ptrdiff_t
arg_data_type = DataTypes.LONG
elif fmt in ('c', 'd', 'i', 'o', 'u') or str.lower(fmt) == 'x':
if fmt_str[idx - 1] == 'l':
if fmt_str[idx - 2] == 'l':
arg_data_type = DataTypes.LONG_LONG
else:
arg_data_type = DataTypes.LONG
else:
arg_data_type = DataTypes.INT
is_parsing = False
do_extract = True
elif fmt in ('s', 'p', 'n'):
arg_data_type = DataTypes.PTR
is_parsing = False
do_extract = True
elif str.lower(fmt) in ('a', 'e', 'f', 'g'):
# Python doesn't do"long double".
#
# Parse it as double (probably incorrect), but
# still have to skip enough bytes.
if fmt_str[idx - 1] == 'L':
arg_data_type = DataTypes.LONG_DOUBLE
else:
arg_data_type = DataTypes.DOUBLE
is_parsing = False
do_extract = True
else:
is_parsing = False
continue
if do_extract:
do_extract = False
align = self.data_types.get_alignment(arg_data_type)
size = self.data_types.get_sizeof(arg_data_type)
unpack_fmt = self.data_types.get_formatter(arg_data_type)
one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0]
if fmt == 's':
one_arg = self.__get_string(one_arg, arg_offset, string_tbl)
args.append(one_arg)
arg_offset += size
# Align the offset
arg_offset = int((arg_offset + align - 1) / align) * align
return tuple(args)
@staticmethod
def extract_string_table(str_tbl):
"""Extract string table in a packaged log message"""
tbl = dict()
one_str = ""
next_new_string = True
# Translated from cbvprintf_package()
for one_ch in str_tbl:
if next_new_string:
str_idx = one_ch
next_new_string = False
continue
if one_ch == 0:
tbl[str_idx] = one_str
one_str = ""
next_new_string = True
continue
one_str += chr(one_ch)
return tbl
@staticmethod
def print_hexdump(hex_data, prefix_len):
"""Print hex dump"""
hex_vals = ""
chr_vals = ""
chr_done = 0
for one_hex in hex_data:
hex_vals += "%x " % one_hex
chr_vals += chr(one_hex)
chr_done += 1
if chr_done == HEX_BYTES_IN_LINE / 2:
hex_vals += " "
chr_vals += " "
elif chr_done == HEX_BYTES_IN_LINE:
print("%s%s|%s" % ((" " * prefix_len), hex_vals, chr_vals))
hex_vals = ""
chr_vals = ""
chr_done = 0
if len(chr_vals) > 0:
hex_padding = " " * (HEX_BYTES_IN_LINE - chr_done)
print("%s%s%s|%s" % ((" " * prefix_len), hex_vals, hex_padding, chr_vals))
def parse_one_normal_msg(self, logdata, offset):
"""Parse one normal log message and print the encoded message"""
# Parse log message header
log_desc, source_id = struct.unpack_from(self.fmt_msg_hdr, logdata, offset)
offset += struct.calcsize(self.fmt_msg_hdr)
timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0]
offset += struct.calcsize(self.fmt_msg_timestamp)
# domain_id, level, pkg_len, data_len
domain_id = log_desc & 0x07
level = (log_desc >> 3) & 0x07
pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1)
data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1)
level_str = get_log_level_str(level)
source_id_str = self.database.get_log_source_string(domain_id, source_id)
# Skip over data to point to next message (save as return value)
next_msg_offset = offset + pkg_len + data_len
# Offset from beginning of cbprintf_packaged data to end of va_list arguments
offset_end_of_args = struct.unpack_from("B", logdata, offset)[0]
offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT)
offset_end_of_args += offset
# Extra data after packaged log
extra_data = logdata[(offset + pkg_len):next_msg_offset]
# Number of packaged strings
num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0]
# Extract the string table in the packaged log message
string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)])
if len(string_tbl) != num_packed_strings:
logger.error("------ Error extracting string table")
return None
# Skip packaged string header
offset += self.data_types.get_sizeof(DataTypes.PTR)
# Grab the format string
#
# Note the negative offset to __get_string(). It is because
# the offset begins at 0 for va_list. However, the format string
# itself is before the va_list, so need to go back the width of
# a pointer.
fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR),
logdata, offset)[0]
fmt_str = self.__get_string(fmt_str_ptr,
-self.data_types.get_sizeof(DataTypes.PTR),
string_tbl)
offset += self.data_types.get_sizeof(DataTypes.PTR)
if not fmt_str:
logger.error("------ Error getting format string at 0x%x", fmt_str_ptr)
return None
args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl)
fmt_str = formalize_fmt_string(fmt_str)
log_msg = fmt_str % args
if level == 0:
print("%s" % log_msg, end='')
else:
log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: "
print("%s%s" % (log_prefix, log_msg))
if data_len > 0:
# Has hexdump data
self.print_hexdump(extra_data, len(log_prefix))
# Point to next message
return next_msg_offset
def parse_log_data(self, logdata, debug=False):
"""Parse binary log data and print the encoded log messages"""
offset = 0
while offset < len(logdata):
# Get message type
msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0]
offset += struct.calcsize(self.fmt_msg_type)
if msg_type == MSG_TYPE_DROPPED:
num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset)
offset += struct.calcsize(self.fmt_dropped_cnt)
print("--- %d messages dropped ---" % num_dropped)
elif msg_type == MSG_TYPE_NORMAL:
ret = self.parse_one_normal_msg(logdata, offset)
if ret is None:
return False
offset = ret
else:
logger.error("------ Unknown message type: %s", msg_type)
return False
return True

View file

@ -0,0 +1,43 @@
#!/usr/bin/env python3
#
# Copyright (c) 2021 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0
"""
Utilities for Dictionary-based Logingg Parser
"""
import binascii
def convert_hex_file_to_bin(hexfile):
"""This converts a file in hexadecimal to binary"""
bin_data = b''
with open(hexfile, "r") as hfile:
for line in hfile.readlines():
hex_str = line.strip()
bin_str = binascii.unhexlify(hex_str)
bin_data += bin_str
return bin_data
def extract_string_from_section(section, str_ptr):
"""Extract all the strings in an ELF section"""
data = section['data']
max_offset = section['size']
offset = str_ptr - section['start']
if offset < 0 or offset >= max_offset:
return None
ret_str = ""
while (data[offset] != 0) and (offset < max_offset):
ret_str += chr(data[offset])
offset += 1
return ret_str

View file

@ -78,6 +78,10 @@ if(NOT CONFIG_LOG_MINIMAL)
log_backend_spinel.c
)
if(CONFIG_LOG_DICTIONARY_SUPPORT)
zephyr_sources(log_output_dict.c)
endif()
else()
zephyr_sources(log_minimal.c)
endif()

View file

@ -27,6 +27,19 @@ config LOG_MIPI_SYST_ENABLE
help
Enable MIPI SyS-T format output for the logger system.
config LOG_DICTIONARY_SUPPORT
bool
depends on LOG2
help
Enable support for dictionary based logging.
Dictionary based logging is binary based where predefined strings
are replaced with their equivalent addresses from the built binary
image file in log output. This reduces the size required to store
the log output when there are long format strings to be logged.
This should be selected by the backend automatically.
config LOG_IMMEDIATE_CLEAN_OUTPUT
bool "Clean log output"
depends on LOG_IMMEDIATE

View file

@ -0,0 +1,74 @@
/*
* Copyright (c) 2018 Nordic Semiconductor ASA
* Copyright (c) 2021 Intel Corporation
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <logging/log.h>
#include <logging/log_ctrl.h>
#include <logging/log_output.h>
#include <logging/log_output_dict.h>
#include <sys/__assert.h>
#include <sys/util.h>
static void buffer_write(log_output_func_t outf, uint8_t *buf, size_t len,
void *ctx)
{
int processed;
do {
processed = outf(buf, len, ctx);
len -= processed;
buf += processed;
} while (len != 0);
}
void log_dict_output_msg2_process(const struct log_output *output,
struct log_msg2 *msg, uint32_t flags)
{
struct log_dict_output_normal_msg_hdr_t output_hdr;
void *source = (void *)log_msg2_get_source(msg);
/* Keep sync with header in struct log_msg2 */
output_hdr.type = MSG_NORMAL;
output_hdr.domain = msg->hdr.desc.domain;
output_hdr.level = msg->hdr.desc.level;
output_hdr.package_len = msg->hdr.desc.package_len;
output_hdr.data_len = msg->hdr.desc.data_len;
output_hdr.timestamp = msg->hdr.timestamp;
output_hdr.source = (source != NULL) ?
(IS_ENABLED(CONFIG_LOG_RUNTIME_FILTERING) ?
log_dynamic_source_id(source) :
log_const_source_id(source)) :
0U;
buffer_write(output->func, (uint8_t *)&output_hdr, sizeof(output_hdr),
(void *)output);
size_t len;
uint8_t *data = log_msg2_get_package(msg, &len);
if (len > 0U) {
buffer_write(output->func, data, len, (void *)output);
}
data = log_msg2_get_data(msg, &len);
if (len > 0U) {
buffer_write(output->func, data, len, (void *)output);
}
log_output_flush(output);
}
void log_dict_output_dropped_process(const struct log_output *output, uint32_t cnt)
{
struct log_dict_output_dropped_msg_t msg;
msg.type = MSG_DROPPED_MSG;
msg.num_dropped_messages = MIN(cnt, 9999);
buffer_write(output->func, (uint8_t *)&msg, sizeof(msg),
(void *)output);
}