logging: add support for dictionary based logging

This adds dictionary based logging support. Dictionary based logging is binary based where one big difference is that static strings are stored as pointers instead of the whole string. This results in reduced space requirements for storing log messages in certain scenairos. Signed-off-by: Daniel Leung <daniel.leung@intel.com>
2021-04-02 12:54:53 -07:00 · 2021-04-02 12:54:53 -07:00 · a5ab1a7518
parent 2c6da987cd
commit a5ab1a7518
13 changed files with 1391 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1524,6 +1524,24 @@ list(APPEND
  ${extra_post_build_byproducts}
  )

+if(CONFIG_LOG_DICTIONARY_SUPPORT)
+  set(LOG_DICT_DB_NAME ${PROJECT_BINARY_DIR}/log_dictionary.json)
+
+  list(APPEND
+    post_build_commands
+    COMMAND
+    ${PYTHON_EXECUTABLE}
+    ${ZEPHYR_BASE}/scripts/logging/dictionary/database_gen.py
+    ${KERNEL_ELF_NAME}
+    ${LOG_DICT_DB_NAME}
+    --build ${BUILD_VERSION}
+    )
+  list(APPEND
+    post_build_byproducts
+    ${LOG_DICT_DB_NAME}
+    )
+endif()
+
 # Add post_build_commands to post-process the final .elf file produced by
 # either the ZEPHYR_PREBUILT_EXECUTABLE or the KERNEL_ELF executable
 # targets above.
--- a/1
+++ b/1
@ -537,6 +537,7 @@
 /scripts/coccinelle/                      @himanshujha199640 @JuliaLawall
 /scripts/coredump/                        @dcpleung
 /scripts/kconfig/                         @ulfalizer
+/scripts/logging/dictionary/              @dcpleung
 /scripts/pylib/twister/expr_parser.py     @nashif
 /scripts/schemas/twister/                 @nashif
 /scripts/gen_app_partitions.py            @dcpleung @nashif
--- a/include/logging/log_output_dict.h
+++ b/include/logging/log_output_dict.h
@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2018 Nordic Semiconductor ASA
+ * Copyright (c) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+#ifndef ZEPHYR_INCLUDE_LOGGING_LOG_OUTPUT_DICT_H_
+#define ZEPHYR_INCLUDE_LOGGING_LOG_OUTPUT_DICT_H_
+
+#include <logging/log_output.h>
+#include <logging/log_msg2.h>
+#include <stdarg.h>
+#include <toolchain.h>
+#include <sys/util.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Log message type
+ */
+enum log_dict_output_msg_type {
+	MSG_NORMAL = 0,
+	MSG_DROPPED_MSG = 1,
+};
+
+/**
+ * Output header for one dictionary based log message.
+ */
+struct log_dict_output_normal_msg_hdr_t {
+	uint8_t type;
+	uint32_t domain:3;
+	uint32_t level:3;
+	uint32_t package_len:10;
+	uint32_t data_len:12;
+	uintptr_t source;
+	log_timestamp_t timestamp;
+} __packed;
+
+/**
+ * Output for one dictionary based log message about
+ * dropped messages.
+ */
+struct log_dict_output_dropped_msg_t {
+	uint8_t type;
+	uint16_t num_dropped_messages;
+} __packed;
+
+/** @brief Process log messages v2 for dictionary-basde logging.
+ *
+ * Function is using provided context with the buffer and output function to
+ * process formatted string and output the data.
+ *
+ * @param log_output Pointer to the log output instance.
+ * @param msg Log message.
+ * @param flags Optional flags.
+ */
+void log_dict_output_msg2_process(const struct log_output *log_output,
+				  struct log_msg2 *msg, uint32_t flags);
+
+/** @brief Process dropped messages indication for dictionary-based logging.
+ *
+ * Function prints error message indicating lost log messages.
+ *
+ * @param output Pointer to the log output instance.
+ * @param cnt        Number of dropped messages.
+ */
+void log_dict_output_dropped_process(const struct log_output *output, uint32_t cnt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* ZEPHYR_INCLUDE_LOGGING_LOG_OUTPUT_DICT_H_ */
--- a/scripts/logging/dictionary/database_gen.py
+++ b/scripts/logging/dictionary/database_gen.py
@ -0,0 +1,291 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Dictionary-based Logging Database Generator
+
+This takes the built Zephyr ELF binary and produces a JSON database
+file for dictionary-based logging. This database is used together
+with the parser to decode binary log messages.
+"""
+
+import argparse
+import logging
+import os
+import struct
+import sys
+
+import parser.log_database
+from parser.log_database import LogDatabase
+
+import elftools
+from elftools.elf.elffile import ELFFile
+from elftools.elf.descriptions import describe_ei_data
+from elftools.elf.sections import SymbolTableSection
+
+
+LOGGER_FORMAT = "%(name)s: %(levelname)s: %(message)s"
+logger = logging.getLogger(os.path.basename(sys.argv[0]))
+
+
+# Sections that contains static strings
+STATIC_STRING_SECTIONS = ['rodata', '.rodata', 'log_strings_sections']
+
+
+def parse_args():
+    """Parse command line arguments"""
+    argparser = argparse.ArgumentParser()
+
+    argparser.add_argument("elffile", help="Zephyr ELF binary")
+    argparser.add_argument("dbfile", help="Dictionary Logging Database file")
+    argparser.add_argument("--build", help="Build ID")
+    argparser.add_argument("--debug", action="store_true",
+                           help="Print extra debugging information")
+    argparser.add_argument("-v", "--verbose", action="store_true",
+                           help="Print more information")
+
+    return argparser.parse_args()
+
+
+def find_elf_sections(elf, sh_name):
+    """Find all sections in ELF file"""
+    for section in elf.iter_sections():
+        if section.name == sh_name:
+            ret = {
+                'name'    : section.name,
+                'size'    : section['sh_size'],
+                'start'   : section['sh_addr'],
+                'end'     : section['sh_addr'] + section['sh_size'] - 1,
+                'data'    : section.data(),
+            }
+
+            return ret
+
+    return None
+
+
+def get_kconfig_symbols(elf):
+    """Get kconfig symbols from the ELF file"""
+    for section in elf.iter_sections():
+        if isinstance(section, SymbolTableSection):
+            return {sym.name: sym.entry.st_value
+                    for sym in section.iter_symbols()
+                       if sym.name.startswith("CONFIG_")}
+
+    raise LookupError("Could not find symbol table")
+
+
+def find_log_const_symbols(elf):
+    """Extract all "log_const_*" symbols from ELF file"""
+    symbol_tables = [s for s in elf.iter_sections()
+                     if isinstance(s, elftools.elf.sections.SymbolTableSection)]
+
+    ret_list = []
+
+    for section in symbol_tables:
+        if not isinstance(section, elftools.elf.sections.SymbolTableSection):
+            continue
+
+        if section['sh_entsize'] == 0:
+            continue
+
+        for symbol in section.iter_symbols():
+            if symbol.name.startswith("log_const_"):
+                ret_list.append(symbol)
+
+    return ret_list
+
+
+def parse_log_const_symbols(database, log_const_section, log_const_symbols):
+    """Find the log instances and map source IDs to names"""
+    if database.is_tgt_little_endian():
+        formatter = "<"
+    else:
+        formatter = ">"
+
+    if database.is_tgt_64bit():
+        # 64-bit pointer to string
+        formatter += "Q"
+    else:
+        # 32-bit pointer to string
+        formatter += "L"
+
+    # log instance level
+    formatter += "B"
+
+    datum_size = struct.calcsize(formatter)
+
+    # Get the address of first log instance
+    first_offset = log_const_symbols[0].entry['st_value']
+    for sym in log_const_symbols:
+        if sym.entry['st_value'] < first_offset:
+            first_offset = sym.entry['st_value']
+
+    first_offset -= log_const_section['start']
+
+    # find all log_const_*
+    for sym in log_const_symbols:
+        # Find data offset in log_const_section for this symbol
+        offset = sym.entry['st_value'] - log_const_section['start']
+
+        idx_s = offset
+        idx_e = offset + datum_size
+
+        datum = log_const_section['data'][idx_s:idx_e]
+
+        if len(datum) != datum_size:
+            # Not enough data to unpack
+            continue
+
+        str_ptr, level = struct.unpack(formatter, datum)
+
+        # Offset to rodata section for string
+        instance_name = database.find_string(str_ptr)
+
+        logger.info("Found Log Instance: %s, level: %d", instance_name, level)
+
+        # source ID is simply the element index in the log instance array
+        source_id = int((offset - first_offset) / sym.entry['st_size'])
+
+        database.add_log_instance(source_id, instance_name, level, sym.entry['st_value'])
+
+
+def extract_elf_information(elf, database):
+    """Extract information from ELF file and store in database"""
+    e_ident = elf.header['e_ident']
+    elf_data = describe_ei_data(e_ident['EI_DATA'])
+
+    if elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2LSB']:
+        database.set_tgt_endianness(LogDatabase.LITTLE_ENDIAN)
+    elif elf_data == elftools.elf.descriptions._DESCR_EI_DATA['ELFDATA2MSB']:
+        database.set_tgt_endianness(LogDatabase.BIG_ENDIAN)
+    else:
+        logger.error("Cannot determine endianness from ELF file, exiting...")
+        sys.exit(1)
+
+
+def process_kconfigs(elf, database):
+    """Process kconfigs to extract information"""
+    kconfigs = get_kconfig_symbols(elf)
+
+    # 32 or 64-bit target
+    database.set_tgt_bits(64 if "CONFIG_64BIT" in kconfigs else 32)
+
+    # Architecture
+    for name, arch in parser.log_database.ARCHS.items():
+        if arch['kconfig'] in kconfigs:
+            database.set_arch(name)
+            break
+
+    # Put some kconfigs into the database
+    #
+    # Use 32-bit timestamp? or 64-bit?
+    if "CONFIG_LOG_TIMESTAMP_64BIT" in kconfigs:
+        database.add_kconfig("CONFIG_LOG_TIMESTAMP_64BIT",
+                             kconfigs['CONFIG_LOG_TIMESTAMP_64BIT'])
+
+
+def extract_static_string_sections(elf, database):
+    """Extract sections containing static strings"""
+    string_sections = STATIC_STRING_SECTIONS
+
+    # Some architectures may put static strings into additional sections.
+    # So need to extract them too.
+    arch_data = parser.log_database.ARCHS[database.get_arch()]
+    if "extra_string_section" in arch_data:
+        string_sections.extend(arch_data['extra_string_section'])
+
+    for name in string_sections:
+        content = find_elf_sections(elf, name)
+        if content is None:
+            continue
+
+        logger.info("Found section: %s, 0x%x - 0x%x",
+                    name, content['start'], content['end'])
+        database.add_string_section(name, content)
+
+    if not database.has_string_sections():
+        logger.error("Cannot find any static string sections in ELF, exiting...")
+        sys.exit(1)
+
+
+def extract_logging_subsys_information(elf, database):
+    """
+    Extract logging subsys related information and store in database.
+
+    For example, this extracts the list of log instances to establish
+    mapping from source ID to name.
+    """
+    # Extract log constant section for module names
+    section_log_const = find_elf_sections(elf, "log_const_sections")
+    if section_log_const is None:
+        # ESP32 puts "log_const_*" info log_static_section instead of log_const_sections
+        section_log_const = find_elf_sections(elf, "log_static_section")
+
+    if section_log_const is None:
+        logger.error("Cannot find section 'log_const_sections' in ELF file, exiting...")
+        sys.exit(1)
+
+    # Find all "log_const_*" symbols and parse them
+    log_const_symbols = find_log_const_symbols(elf)
+    parse_log_const_symbols(database, section_log_const, log_const_symbols)
+
+
+def main():
+    """Main function of database generator"""
+    args = parse_args()
+
+    # Setup logging
+    logging.basicConfig(format=LOGGER_FORMAT)
+    if args.verbose:
+        logger.setLevel(logging.INFO)
+    elif args.debug:
+        logger.setLevel(logging.DEBUG)
+    else:
+        logger.setLevel(logging.WARNING)
+
+    elffile = open(args.elffile, "rb")
+    if not elffile:
+        logger.error("ERROR: Cannot open ELF file: %s, exiting...", args.elffile)
+        sys.exit(1)
+
+    logger.info("ELF file %s", args.elffile)
+    logger.info("Database file %s", args.dbfile)
+
+    elf = ELFFile(elffile)
+
+    database = LogDatabase()
+
+    if args.build:
+        database.set_build_id(args.build)
+        logger.info("Build ID: %s", args.build)
+
+    extract_elf_information(elf, database)
+
+    process_kconfigs(elf, database)
+
+    logger.info("Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits())
+    if database.is_tgt_little_endian():
+        logger.info("Endianness: Little")
+    else:
+        logger.info("Endianness: Big")
+
+    # Extract sections from ELF files that contain strings
+    extract_static_string_sections(elf, database)
+
+    # Extract information related to logging subsystem
+    extract_logging_subsys_information(elf, database)
+
+    # Write database file
+    if not LogDatabase.write_json_database(args.dbfile, database):
+        logger.error("ERROR: Cannot open database file for write: %s, exiting...", args.dbfile)
+        sys.exit(1)
+
+    elffile.close()
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/logging/dictionary/log_parser.py
+++ b/scripts/logging/dictionary/log_parser.py
@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Log Parser for Dictionary-based Logging
+
+This uses the JSON database file to decode the input binary
+log data and print the log messages.
+"""
+
+import argparse
+import binascii
+import logging
+import sys
+
+import parser
+from parser.log_database import LogDatabase
+
+
+LOGGER_FORMAT = "%(message)s"
+logger = logging.getLogger("parser")
+
+LOG_HEX_SEP = "##ZLOGV1##"
+
+
+def parse_args():
+    """Parse command line arguments"""
+    argparser = argparse.ArgumentParser()
+
+    argparser.add_argument("dbfile", help="Dictionary Logging Database file")
+    argparser.add_argument("logfile", help="Log Data file")
+    argparser.add_argument("--hex", action="store_true",
+                           help="Log Data file is in hexadecimal strings")
+    argparser.add_argument("--rawhex", action="store_true",
+                           help="Log file only contains hexadecimal log data")
+    argparser.add_argument("--debug", action="store_true",
+                           help="Print extra debugging information")
+
+    return argparser.parse_args()
+
+
+def main():
+    """Main function of log parser"""
+    args = parse_args()
+
+    # Setup logging for parser
+    logging.basicConfig(format=LOGGER_FORMAT)
+    if args.debug:
+        logger.setLevel(logging.DEBUG)
+    else:
+        logger.setLevel(logging.INFO)
+
+    # Read from database file
+    database = LogDatabase.read_json_database(args.dbfile)
+    if database is None:
+        logger.error("ERROR: Cannot open database file: %s, exiting...", args.dbfile)
+        sys.exit(1)
+
+    # Open log data file for reading
+    if args.hex:
+        if args.rawhex:
+            # Simply log file with only hexadecimal data
+            logdata = parser.utils.convert_hex_file_to_bin(args.logfile)
+        else:
+            hexdata = ''
+
+            with open(args.logfile, "r") as hexfile:
+                for line in hexfile.readlines():
+                    hexdata += line.strip()
+
+            if LOG_HEX_SEP not in hexdata:
+                logger.error("ERROR: Cannot find start of log data, exiting...")
+                sys.exit(1)
+
+            idx = hexdata.index(LOG_HEX_SEP) + len(LOG_HEX_SEP)
+            hexdata = hexdata[idx:]
+
+            if len(hexdata) % 2 != 0:
+                # Make sure there are even number of characters
+                idx = int(len(hexdata) / 2) * 2
+                hexdata = hexdata[:idx]
+
+            idx = 0
+            while idx < len(hexdata):
+                # When running QEMU via west or ninja, there may be additional
+                # strings printed by QEMU, west or ninja (for example, QEMU
+                # is terminated, or user interrupted, etc). So we need to
+                # figure out where the end of log data stream by
+                # trying to convert from hex to bin.
+                idx += 2
+
+                try:
+                    binascii.unhexlify(hexdata[:idx])
+                except binascii.Error:
+                    idx -= 2
+                    break
+
+            logdata = binascii.unhexlify(hexdata[:idx])
+    else:
+        logfile = open(args.logfile, "rb")
+        if not logfile:
+            logger.error("ERROR: Cannot open binary log data file: %s, exiting...", args.logfile)
+            sys.exit(1)
+
+        logdata = logfile.read()
+
+        logfile.close()
+
+    log_parser = parser.get_parser(database)
+    if log_parser is not None:
+        logger.debug("# Build ID: %s", database.get_build_id())
+        logger.debug("# Target: %s, %d-bit", database.get_arch(), database.get_tgt_bits())
+        if database.is_tgt_little_endian():
+            logger.debug("# Endianness: Little")
+        else:
+            logger.debug("# Endianness: Big")
+
+        ret = log_parser.parse_log_data(logdata, debug=args.debug)
+        if not ret:
+            logger.error("ERROR: there were error(s) parsing log data")
+            sys.exit(1)
+    else:
+        logger.error("ERROR: Cannot find a suitable parser matching database version!")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/logging/dictionary/parser/init.py
+++ b/scripts/logging/dictionary/parser/init.py
@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Dictionary-based Logging Parser Module
+"""
+
+from .log_parser_v1 import LogParserV1
+
+
+def get_parser(database):
+    """Get the parser object based on database"""
+    db_ver = int(database.get_version())
+
+    if db_ver == 1:
+        return LogParserV1(database)
+
+    return None
--- a/scripts/logging/dictionary/parser/log_database.py
+++ b/scripts/logging/dictionary/parser/log_database.py
@ -0,0 +1,248 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Class for Dictionary-based Logging Database
+"""
+
+import base64
+import copy
+import json
+
+from .utils import extract_string_from_section
+
+
+ARCHS = {
+    "arc" : {
+        "kconfig": "CONFIG_ARC",
+    },
+    "arm" : {
+        "kconfig": "CONFIG_ARM",
+    },
+    "arm64" : {
+        "kconfig": "CONFIG_ARM64",
+    },
+    "sparc" : {
+        "kconfig": "CONFIG_SPARC",
+    },
+    "x86" : {
+        "kconfig": "CONFIG_X86",
+    },
+    "nios2" : {
+        "kconfig": "CONFIG_NIOS2",
+
+        # Small static strings are put into section "datas"
+        # so we need to include them also.
+        #
+        # See include/arch/nios2/linker.ld on .sdata.*
+        # for explanation.
+        "extra_string_section": ['datas'],
+    },
+    "riscv" : {
+        "kconfig": "CONFIG_RISCV",
+    },
+    "xtensa" : {
+        "kconfig": "CONFIG_XTENSA",
+    },
+}
+
+
+class LogDatabase():
+    """Class of log database"""
+    # Update this if binary format of dictionary based logging
+    # has changed
+    ZEPHYR_DICT_LOG_VER = 1
+
+    LITTLE_ENDIAN = True
+    BIG_ENDIAN = False
+
+    def __init__(self):
+        new_db = dict()
+
+        new_db['version'] = self.ZEPHYR_DICT_LOG_VER
+        new_db['target'] = dict()
+        new_db['sections'] = dict()
+        new_db['log_subsys'] = dict()
+        new_db['log_subsys']['log_instances'] = dict()
+        new_db['build_id'] = None
+        new_db['arch'] = None
+        new_db['kconfigs'] = dict()
+
+        self.database = new_db
+
+
+    def get_version(self):
+        """Get Database Version"""
+        return self.database['version']
+
+
+    def get_build_id(self):
+        """Get Build ID"""
+        return self.database['build_id']
+
+
+    def set_build_id(self, build_id):
+        """Set Build ID in Database"""
+        self.database['build_id'] = build_id
+
+
+    def get_arch(self):
+        """Get the Target Architecture"""
+        return self.database['arch']
+
+
+    def set_arch(self, arch):
+        """Set the Target Architecture"""
+        self.database['arch'] = arch
+
+
+    def get_tgt_bits(self):
+        """Get Target Bitness: 32 or 64"""
+        if 'bits' in self.database['target']:
+            return self.database['target']['bits']
+
+        return None
+
+
+    def set_tgt_bits(self, bits):
+        """Set Target Bitness: 32 or 64"""
+        self.database['target']['bits'] = bits
+
+
+    def is_tgt_64bit(self):
+        """Return True if target is 64-bit, False if 32-bit.
+        None if error."""
+        if 'bits' not in self.database['target']:
+            return None
+
+        if self.database['target']['bits'] == 32:
+            return False
+
+        if self.database['target']['bits'] == 64:
+            return True
+
+        return None
+
+
+    def get_tgt_endianness(self):
+        """
+        Get Target Endianness.
+
+        Return True if little endian, False if big.
+        """
+        if 'little_endianness' in self.database['target']:
+            return self.database['target']['little_endianness']
+
+        return None
+
+
+    def set_tgt_endianness(self, endianness):
+        """
+        Set Target Endianness
+
+        True if little endian, False if big.
+        """
+        self.database['target']['little_endianness'] = endianness
+
+
+    def is_tgt_little_endian(self):
+        """Return True if target is little endian"""
+        if 'little_endianness' not in self.database['target']:
+            return None
+
+        return self.database['target']['little_endianness'] == self.LITTLE_ENDIAN
+
+
+    def add_string_section(self, name, sect_dict):
+        """Add a static string section to the collection"""
+        self.database['sections'][name] = sect_dict
+
+
+    def has_string_sections(self):
+        """Return True if there are any static string sections"""
+        return len(self.database['sections']) != 0
+
+
+    def find_string(self, string_ptr):
+        """Find string pointed by string_ptr from any static string section.
+        Return None if not found."""
+        for _, sect in self.database['sections'].items():
+            one_str = extract_string_from_section(sect, string_ptr)
+
+            if one_str is not None:
+                return one_str
+
+        return None
+
+
+    def add_log_instance(self, source_id, name, level, address):
+        """Add one log instance into database"""
+        self.database['log_subsys']['log_instances'][source_id] = {
+            'source_id' : source_id,
+            'name'      : name,
+            'level'     : level,
+            'addr'      : address,
+        }
+
+
+    def get_log_source_string(self, domain_id, source_id):
+        """Get the source string based on source ID"""
+        # JSON stores key as string, so we need to convert
+        src_id = str(source_id)
+
+        if src_id in self.database['log_subsys']['log_instances']:
+            return self.database['log_subsys']['log_instances'][src_id]['name']
+
+        return f"unknown<{domain_id}:{source_id}>"
+
+
+    def add_kconfig(self, name, val):
+        """Add a kconfig name-value pair into database"""
+        self.database['kconfigs'][name] = val
+
+
+    def get_kconfigs(self):
+        """Return kconfig name-value pairs"""
+        return self.database['kconfigs']
+
+
+    @staticmethod
+    def read_json_database(db_file_name):
+        """Read database from file and return a LogDatabase object"""
+        try:
+            with open(db_file_name, "r") as db_fd:
+                json_db = json.load(db_fd)
+        except (OSError, json.JSONDecodeError):
+            return None
+
+        # Decode data in JSON back into binary data
+        for _, sect in json_db['sections'].items():
+            sect['data'] = base64.b64decode(sect['data_b64'])
+
+        database = LogDatabase()
+        database.database = json_db
+
+        return database
+
+
+    @staticmethod
+    def write_json_database(db_file_name, database):
+        """Write the database into file"""
+        json_db = copy.deepcopy(database.database)
+
+        # Make database object into something JSON can dump
+        for _, sect in json_db['sections'].items():
+            encoded = base64.b64encode(sect['data'])
+            sect['data_b64'] = encoded.decode('ascii')
+            del sect['data']
+
+        try:
+            with open(db_file_name, "w") as db_fd:
+                db_fd.write(json.dumps(json_db))
+        except OSError:
+            return False
+
+        return True
--- a/scripts/logging/dictionary/parser/log_parser.py
+++ b/scripts/logging/dictionary/parser/log_parser.py
@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Abstract Class for Dictionary-based Logging Parsers
+"""
+
+import abc
+
+
+class LogParser(abc.ABC):
+    """Abstract class of log parser"""
+    def __init__(self, database):
+        self.database = database
+
+    @abc.abstractmethod
+    def parse_log_data(self, logdata, debug=False):
+        """Parse log data"""
+        return None
--- a/scripts/logging/dictionary/parser/log_parser_v1.py
+++ b/scripts/logging/dictionary/parser/log_parser_v1.py
@ -0,0 +1,450 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Dictionary-based Logging Parser Version 1
+
+This contains the implementation of the parser for
+version 1 databases.
+"""
+
+import logging
+import math
+import struct
+
+from .log_parser import LogParser
+
+
+HEX_BYTES_IN_LINE = 16
+
+LOG_LEVELS = ['none', 'err', 'wrn', 'inf', 'dbg']
+
+# Need to keep sync with struct log_dict_output_msg_hdr in
+# include/logging/log_output_dict.h.
+#
+# struct log_dict_output_normal_msg_hdr_t {
+#     uint8_t type;
+#     uint32_t domain:3;
+#     uint32_t level:3;
+#     uint32_t package_len:10;
+#     uint32_t data_len:12;
+#     uintptr_t source;
+#     log_timestamp_t timestamp;
+# } __packed;
+#
+# Note "type" and "timestamp" are encoded separately below.
+FMT_MSG_HDR_32 = "II"
+FMT_MSG_HDR_64 = "IQ"
+
+# Message type
+# 0: normal message
+# 1: number of dropped messages
+FMT_MSG_TYPE = "B"
+
+# Depends on CONFIG_LOG_TIMESTAMP_64BIT
+FMT_MSG_TIMESTAMP_32 = "I"
+FMT_MSG_TIMESTAMP_64 = "Q"
+
+# Keep message types in sync with include/logging/log_output_dict.h
+MSG_TYPE_NORMAL = 0
+MSG_TYPE_DROPPED = 1
+
+# Number of dropped messages
+FMT_DROPPED_CNT = "H"
+
+
+logger = logging.getLogger("parser")
+
+
+def get_log_level_str(lvl):
+    """Convert numeric log level to string"""
+    if lvl < 0 or lvl >= len(LOG_LEVELS):
+        return "unk"
+
+    return LOG_LEVELS[lvl]
+
+
+def formalize_fmt_string(fmt_str):
+    """Replace unsupported formatter"""
+    new_str = fmt_str
+
+    # Python doesn't support %lld or %llu so need to remove extra 'l'
+    new_str = new_str.replace("%lld", "%ld")
+    new_str = new_str.replace("%llu", "%lu")
+
+    # No %p for pointer either, so use %x
+    new_str = new_str.replace("%p", "0x%x")
+
+    return new_str
+
+
+class DataTypes():
+    """Class regarding data types, their alignments and sizes"""
+    INT = 0
+    UINT = 1
+    LONG = 2
+    ULONG = 3
+    LONG_LONG = 4
+    ULONG_LONG = 5
+    PTR = 6
+    DOUBLE = 7
+    LONG_DOUBLE = 8
+    NUM_TYPES = 9
+
+    def __init__(self, database):
+        self.database = database
+        self.data_types = dict()
+
+        if database.is_tgt_64bit():
+            self.add_data_type(self.LONG, "q")
+            self.add_data_type(self.LONG_LONG, "q")
+            self.add_data_type(self.PTR, "Q")
+        else:
+            self.add_data_type(self.LONG, "i")
+            self.add_data_type(self.LONG_LONG, "q")
+            self.add_data_type(self.PTR, "I")
+
+        self.add_data_type(self.INT, "i")
+        self.add_data_type(self.DOUBLE, "d")
+        self.add_data_type(self.LONG_DOUBLE, "d")
+
+
+    def add_data_type(self, data_type, fmt):
+        """Add one data type"""
+        if self.database.is_tgt_little_endian():
+            endianness = "<"
+        else:
+            endianness = ">"
+
+        formatter = endianness + fmt
+
+        self.data_types[data_type] = dict()
+        self.data_types[data_type]['fmt'] = formatter
+
+        size = struct.calcsize(formatter)
+
+        if data_type == self.LONG_DOUBLE:
+            # Python doesn't have long double but we still
+            # need to skip correct number of bytes
+            size = 16
+
+        self.data_types[data_type]['sizeof'] = size
+
+        # Might need actual number for different architectures
+        # but these seem to work fine for now.
+        if self.database.is_tgt_64bit():
+            self.data_types[data_type]['align'] = 8
+        else:
+            self.data_types[data_type]['align'] = 4
+
+
+    def get_sizeof(self, data_type):
+        """Get sizeof() of a data type"""
+        return self.data_types[data_type]['sizeof']
+
+
+    def get_alignment(self, data_type):
+        """Get the alignment of a data type"""
+        return self.data_types[data_type]['align']
+
+
+    def get_formatter(self, data_type):
+        """Get the formatter for a data type"""
+        return self.data_types[data_type]['fmt']
+
+
+class LogParserV1(LogParser):
+    """Log Parser V1"""
+    def __init__(self, database):
+        super().__init__(database=database)
+
+        if self.database.is_tgt_little_endian():
+            endian = "<"
+        else:
+            endian = ">"
+
+        self.fmt_msg_type = endian + FMT_MSG_TYPE
+        self.fmt_dropped_cnt = endian + FMT_DROPPED_CNT
+
+        if self.database.is_tgt_64bit():
+            self.fmt_msg_hdr = endian + FMT_MSG_HDR_64
+        else:
+            self.fmt_msg_hdr = endian + FMT_MSG_HDR_32
+
+        if "CONFIG_LOG_TIMESTAMP_64BIT" in self.database.get_kconfigs():
+            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_64
+        else:
+            self.fmt_msg_timestamp = endian + FMT_MSG_TIMESTAMP_32
+
+        self.data_types = DataTypes(self.database)
+
+
+    def __get_string(self, arg, arg_offset, string_tbl):
+        one_str = self.database.find_string(arg)
+        if one_str is not None:
+            ret = one_str
+        else:
+            # The index from the string table is basically
+            # the order in va_list. Need to add to the index
+            # to skip the packaged string header and
+            # the format string.
+            str_idx = arg_offset + self.data_types.get_sizeof(DataTypes.PTR) * 2
+            str_idx /= self.data_types.get_sizeof(DataTypes.INT)
+
+            ret = string_tbl[int(str_idx)]
+
+        return ret
+
+
+    def process_one_fmt_str(self, fmt_str, arg_list, string_tbl):
+        """Parse the format string to extract arguments from
+        the binary arglist and return a tuple usable with
+        Python's string formatting"""
+        idx = 0
+        arg_offset = 0
+        is_parsing = False
+        do_extract = False
+
+        args = list()
+
+        # Translated from cbvprintf_package()
+        for idx, fmt in enumerate(fmt_str):
+            if not is_parsing:
+                if fmt == '%':
+                    is_parsing = True
+                    arg_data_type = DataTypes.INT
+                    continue
+
+            elif fmt == '%':
+                # '%%' -> literal percentage sign
+                is_parsing = False
+                continue
+
+            elif fmt == '*':
+                pass
+
+            elif fmt.isdecimal() or str.lower(fmt) == 'l' \
+                or fmt in (' ', '#', '-', '+', '.', 'h'):
+                # formatting modifiers, just ignore
+                continue
+
+            elif fmt in ('j', 'z', 't'):
+                # intmax_t, size_t or ptrdiff_t
+                arg_data_type = DataTypes.LONG
+
+            elif fmt in ('c', 'd', 'i', 'o', 'u') or str.lower(fmt) == 'x':
+                if fmt_str[idx - 1] == 'l':
+                    if fmt_str[idx - 2] == 'l':
+                        arg_data_type = DataTypes.LONG_LONG
+                    else:
+                        arg_data_type = DataTypes.LONG
+                else:
+                    arg_data_type = DataTypes.INT
+
+                is_parsing = False
+                do_extract = True
+
+            elif fmt in ('s', 'p', 'n'):
+                arg_data_type = DataTypes.PTR
+
+                is_parsing = False
+                do_extract = True
+
+            elif str.lower(fmt) in ('a', 'e', 'f', 'g'):
+                # Python doesn't do"long double".
+                #
+                # Parse it as double (probably incorrect), but
+                # still have to skip enough bytes.
+                if fmt_str[idx - 1] == 'L':
+                    arg_data_type = DataTypes.LONG_DOUBLE
+                else:
+                    arg_data_type = DataTypes.DOUBLE
+
+                is_parsing = False
+                do_extract = True
+
+            else:
+                is_parsing = False
+                continue
+
+            if do_extract:
+                do_extract = False
+
+                align = self.data_types.get_alignment(arg_data_type)
+                size = self.data_types.get_sizeof(arg_data_type)
+                unpack_fmt = self.data_types.get_formatter(arg_data_type)
+
+                one_arg = struct.unpack_from(unpack_fmt, arg_list, arg_offset)[0]
+
+                if fmt == 's':
+                    one_arg = self.__get_string(one_arg, arg_offset, string_tbl)
+
+                args.append(one_arg)
+                arg_offset += size
+
+                # Align the offset
+                arg_offset = int((arg_offset + align - 1) / align) * align
+
+        return tuple(args)
+
+
+    @staticmethod
+    def extract_string_table(str_tbl):
+        """Extract string table in a packaged log message"""
+        tbl = dict()
+
+        one_str = ""
+        next_new_string = True
+        # Translated from cbvprintf_package()
+        for one_ch in str_tbl:
+            if next_new_string:
+                str_idx = one_ch
+                next_new_string = False
+                continue
+
+            if one_ch == 0:
+                tbl[str_idx] = one_str
+                one_str = ""
+                next_new_string = True
+                continue
+
+            one_str += chr(one_ch)
+
+        return tbl
+
+
+    @staticmethod
+    def print_hexdump(hex_data, prefix_len):
+        """Print hex dump"""
+        hex_vals = ""
+        chr_vals = ""
+        chr_done = 0
+
+        for one_hex in hex_data:
+            hex_vals += "%x " % one_hex
+            chr_vals += chr(one_hex)
+            chr_done += 1
+
+            if chr_done == HEX_BYTES_IN_LINE / 2:
+                hex_vals += " "
+                chr_vals += " "
+
+            elif chr_done == HEX_BYTES_IN_LINE:
+                print("%s%s|%s" % ((" " * prefix_len), hex_vals, chr_vals))
+                hex_vals = ""
+                chr_vals = ""
+                chr_done = 0
+
+        if len(chr_vals) > 0:
+            hex_padding = "   " * (HEX_BYTES_IN_LINE - chr_done)
+            print("%s%s%s|%s" % ((" " * prefix_len), hex_vals, hex_padding, chr_vals))
+
+
+    def parse_one_normal_msg(self, logdata, offset):
+        """Parse one normal log message and print the encoded message"""
+        # Parse log message header
+        log_desc, source_id = struct.unpack_from(self.fmt_msg_hdr, logdata, offset)
+        offset += struct.calcsize(self.fmt_msg_hdr)
+
+        timestamp = struct.unpack_from(self.fmt_msg_timestamp, logdata, offset)[0]
+        offset += struct.calcsize(self.fmt_msg_timestamp)
+
+        # domain_id, level, pkg_len, data_len
+        domain_id = log_desc & 0x07
+        level = (log_desc >> 3) & 0x07
+        pkg_len = (log_desc >> 6) & int(math.pow(2, 10) - 1)
+        data_len = (log_desc >> 16) & int(math.pow(2, 12) - 1)
+
+        level_str = get_log_level_str(level)
+        source_id_str = self.database.get_log_source_string(domain_id, source_id)
+
+        # Skip over data to point to next message (save as return value)
+        next_msg_offset = offset + pkg_len + data_len
+
+        # Offset from beginning of cbprintf_packaged data to end of va_list arguments
+        offset_end_of_args = struct.unpack_from("B", logdata, offset)[0]
+        offset_end_of_args *= self.data_types.get_sizeof(DataTypes.INT)
+        offset_end_of_args += offset
+
+        # Extra data after packaged log
+        extra_data = logdata[(offset + pkg_len):next_msg_offset]
+
+        # Number of packaged strings
+        num_packed_strings = struct.unpack_from("B", logdata, offset+1)[0]
+
+        # Extract the string table in the packaged log message
+        string_tbl = self.extract_string_table(logdata[offset_end_of_args:(offset + pkg_len)])
+
+        if len(string_tbl) != num_packed_strings:
+            logger.error("------ Error extracting string table")
+            return None
+
+        # Skip packaged string header
+        offset += self.data_types.get_sizeof(DataTypes.PTR)
+
+        # Grab the format string
+        #
+        # Note the negative offset to __get_string(). It is because
+        # the offset begins at 0 for va_list. However, the format string
+        # itself is before the va_list, so need to go back the width of
+        # a pointer.
+        fmt_str_ptr = struct.unpack_from(self.data_types.get_formatter(DataTypes.PTR),
+                                         logdata, offset)[0]
+        fmt_str = self.__get_string(fmt_str_ptr,
+                                    -self.data_types.get_sizeof(DataTypes.PTR),
+                                    string_tbl)
+        offset += self.data_types.get_sizeof(DataTypes.PTR)
+
+        if not fmt_str:
+            logger.error("------ Error getting format string at 0x%x", fmt_str_ptr)
+            return None
+
+        args = self.process_one_fmt_str(fmt_str, logdata[offset:offset_end_of_args], string_tbl)
+
+        fmt_str = formalize_fmt_string(fmt_str)
+        log_msg = fmt_str % args
+
+        if level == 0:
+            print("%s" % log_msg, end='')
+        else:
+            log_prefix = f"[{timestamp:>10}] <{level_str}> {source_id_str}: "
+            print("%s%s" % (log_prefix, log_msg))
+
+        if data_len > 0:
+            # Has hexdump data
+            self.print_hexdump(extra_data, len(log_prefix))
+
+        # Point to next message
+        return next_msg_offset
+
+
+    def parse_log_data(self, logdata, debug=False):
+        """Parse binary log data and print the encoded log messages"""
+        offset = 0
+
+        while offset < len(logdata):
+            # Get message type
+            msg_type = struct.unpack_from(self.fmt_msg_type, logdata, offset)[0]
+            offset += struct.calcsize(self.fmt_msg_type)
+
+            if msg_type == MSG_TYPE_DROPPED:
+                num_dropped = struct.unpack_from(self.fmt_dropped_cnt, logdata, offset)
+                offset += struct.calcsize(self.fmt_dropped_cnt)
+
+                print("--- %d messages dropped ---" % num_dropped)
+
+            elif msg_type == MSG_TYPE_NORMAL:
+                ret = self.parse_one_normal_msg(logdata, offset)
+                if ret is None:
+                    return False
+
+                offset = ret
+
+            else:
+                logger.error("------ Unknown message type: %s", msg_type)
+                return False
+
+        return True
--- a/scripts/logging/dictionary/parser/utils.py
+++ b/scripts/logging/dictionary/parser/utils.py
@ -0,0 +1,43 @@
+#!/usr/bin/env python3
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+"""
+Utilities for Dictionary-based Logingg Parser
+"""
+
+import binascii
+
+
+def convert_hex_file_to_bin(hexfile):
+    """This converts a file in hexadecimal to binary"""
+    bin_data = b''
+
+    with open(hexfile, "r") as hfile:
+        for line in hfile.readlines():
+            hex_str = line.strip()
+
+            bin_str = binascii.unhexlify(hex_str)
+            bin_data += bin_str
+
+    return bin_data
+
+
+def extract_string_from_section(section, str_ptr):
+    """Extract all the strings in an ELF section"""
+    data = section['data']
+    max_offset = section['size']
+    offset = str_ptr - section['start']
+
+    if offset < 0 or offset >= max_offset:
+        return None
+
+    ret_str = ""
+
+    while (data[offset] != 0) and (offset < max_offset):
+        ret_str += chr(data[offset])
+        offset += 1
+
+    return ret_str
--- a/subsys/logging/CMakeLists.txt
+++ b/subsys/logging/CMakeLists.txt
@ -78,6 +78,10 @@ if(NOT CONFIG_LOG_MINIMAL)
    log_backend_spinel.c
  )

+  if(CONFIG_LOG_DICTIONARY_SUPPORT)
+    zephyr_sources(log_output_dict.c)
+  endif()
+
 else()
  zephyr_sources(log_minimal.c)
 endif()
--- a/subsys/logging/Kconfig.formatting
+++ b/subsys/logging/Kconfig.formatting
@ -27,6 +27,19 @@ config LOG_MIPI_SYST_ENABLE
 	help
 	  Enable MIPI SyS-T format output for the logger system.

+config LOG_DICTIONARY_SUPPORT
+	bool
+	depends on LOG2
+	help
+	  Enable support for dictionary based logging.
+
+	  Dictionary based logging is binary based where predefined strings
+	  are replaced with their equivalent addresses from the built binary
+	  image file in log output. This reduces the size required to store
+	  the log output when there are long format strings to be logged.
+
+	  This should be selected by the backend automatically.
+
 config LOG_IMMEDIATE_CLEAN_OUTPUT
 	bool "Clean log output"
 	depends on LOG_IMMEDIATE
--- a/subsys/logging/log_output_dict.c
+++ b/subsys/logging/log_output_dict.c
@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2018 Nordic Semiconductor ASA
+ * Copyright (c) 2021 Intel Corporation
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+#include <logging/log.h>
+#include <logging/log_ctrl.h>
+#include <logging/log_output.h>
+#include <logging/log_output_dict.h>
+#include <sys/__assert.h>
+#include <sys/util.h>
+
+static void buffer_write(log_output_func_t outf, uint8_t *buf, size_t len,
+			 void *ctx)
+{
+	int processed;
+
+	do {
+		processed = outf(buf, len, ctx);
+		len -= processed;
+		buf += processed;
+	} while (len != 0);
+}
+
+void log_dict_output_msg2_process(const struct log_output *output,
+				  struct log_msg2 *msg, uint32_t flags)
+{
+	struct log_dict_output_normal_msg_hdr_t output_hdr;
+	void *source = (void *)log_msg2_get_source(msg);
+
+	/* Keep sync with header in struct log_msg2 */
+	output_hdr.type = MSG_NORMAL;
+	output_hdr.domain = msg->hdr.desc.domain;
+	output_hdr.level = msg->hdr.desc.level;
+	output_hdr.package_len = msg->hdr.desc.package_len;
+	output_hdr.data_len = msg->hdr.desc.data_len;
+	output_hdr.timestamp = msg->hdr.timestamp;
+
+	output_hdr.source = (source != NULL) ?
+				(IS_ENABLED(CONFIG_LOG_RUNTIME_FILTERING) ?
+					log_dynamic_source_id(source) :
+					log_const_source_id(source)) :
+				0U;
+
+	buffer_write(output->func, (uint8_t *)&output_hdr, sizeof(output_hdr),
+		     (void *)output);
+
+	size_t len;
+	uint8_t *data = log_msg2_get_package(msg, &len);
+
+	if (len > 0U) {
+		buffer_write(output->func, data, len, (void *)output);
+	}
+
+	data = log_msg2_get_data(msg, &len);
+	if (len > 0U) {
+		buffer_write(output->func, data, len, (void *)output);
+	}
+
+	log_output_flush(output);
+}
+
+void log_dict_output_dropped_process(const struct log_output *output, uint32_t cnt)
+{
+	struct log_dict_output_dropped_msg_t msg;
+
+	msg.type = MSG_DROPPED_MSG;
+	msg.num_dropped_messages = MIN(cnt, 9999);
+
+	buffer_write(output->func, (uint8_t *)&msg, sizeof(msg),
+		     (void *)output);
+}