kernel: syscalls: no need to include all syscalls in binary

The syscall generation phase parses all header files to look
for potential syscalls, and emits all the relevant files to
enable syscalls. However, this results in all the syscall
marshalling functions being included in the final binary.
This is due to these functions being referred to inside
the dispatch list, resulting in ineffective garbage
collection during linking. Previous commits allows each
drivers and subsystems to specify which header files
containing syscalls are relevant. So this commit changes
the syscall generation to only include the syscalls needed
for the build in the syscall dispatch list and removing
various bits related to that. This allows the linker to
garbage collect unused syscall related function, and thus
reducing final binary size.

Signed-off-by: Daniel Leung <daniel.leung@intel.com>
This commit is contained in:
Daniel Leung 2023-05-25 11:41:48 -07:00 committed by Anas Nashif
parent 26ecaba4af
commit 80e78208e6
5 changed files with 151 additions and 35 deletions

View file

@ -92,6 +92,11 @@ set(PARSE_SYSCALLS_TARGET parse_syscalls_target)
define_property(GLOBAL PROPERTY PROPERTY_OUTPUT_FORMAT BRIEF_DOCS " " FULL_DOCS " ")
set_property( GLOBAL PROPERTY PROPERTY_OUTPUT_FORMAT elf32-little${ARCH}) # BFD format
# Contains the list of files with syscall function prototypes.
add_library(syscalls_interface INTERFACE)
set(syscalls_file_list_output
${CMAKE_CURRENT_BINARY_DIR}/misc/generated/syscalls_file_list.txt)
# "zephyr_interface" is a source-less library that encapsulates all the global
# compiler options needed by all source files. All zephyr libraries,
# including the library named "zephyr" link with this library to
@ -728,13 +733,16 @@ add_custom_command(
COMMAND
${PYTHON_EXECUTABLE}
${ZEPHYR_BASE}/scripts/build/parse_syscalls.py
--include ${ZEPHYR_BASE}/include # Read files from this dir
--include ${ZEPHYR_BASE}/drivers # For net sockets
--include ${ZEPHYR_BASE}/subsys/net # More net sockets
--scan ${ZEPHYR_BASE}/include # Read files from this dir
--scan ${ZEPHYR_BASE}/drivers # For net sockets
--scan ${ZEPHYR_BASE}/subsys/net # More net sockets
${parse_syscalls_include_args} # Read files from these dirs also
--json-file ${syscalls_json} # Write this file
--tag-struct-file ${struct_tags_json} # Write subsystem list to this file
--file-list ${syscalls_file_list_output}
$<$<BOOL:${CONFIG_EMIT_ALL_SYSCALLS}>:--emit-all-syscalls>
DEPENDS ${syscalls_subdirs_trigger} ${PARSE_SYSCALLS_HEADER_DEPENDS}
${syscalls_file_list_output} ${syscalls_interface}
)
# Make sure Picolibc is built before the rest of the system; there's no explicit
@ -850,6 +858,14 @@ zephyr_get_include_directories_for_lang(C ZEPHYR_INCLUDES)
add_subdirectory(kernel)
get_property(
syscalls_file_list
TARGET syscalls_interface
PROPERTY INTERFACE_INCLUDE_DIRECTORIES
)
file(CONFIGURE OUTPUT ${syscalls_file_list_output}
CONTENT "@syscalls_file_list@" @ONLY)
# Read list content
get_property(ZEPHYR_LIBS_PROPERTY GLOBAL PROPERTY ZEPHYR_LIBS)

View file

@ -738,6 +738,13 @@ config CHECK_INIT_PRIORITIES_FAIL_ON_WARNING
devices depending on each other but initialized with the same
priority.
config EMIT_ALL_SYSCALLS
bool "Emit all possible syscalls in the tree"
help
This tells the build system to emit all possible syscalls found
in the tree, instead of only those syscalls associated with enabled
drivers and subsystems.
endmenu
config DEPRECATED

View file

@ -1488,14 +1488,37 @@ endfunction()
# Function to add header file(s) to the list to be passed to syscall generator.
function(zephyr_syscall_header)
# Empty function for now. Will implement later.
foreach(one_file ${ARGV})
if(EXISTS ${one_file})
set(header_file ${one_file})
elseif(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${one_file})
set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/${one_file})
else()
message(FATAL_ERROR "Syscall header file not found: ${one_file}")
endif()
target_sources(
syscalls_interface INTERFACE
${header_file}
)
target_include_directories(
syscalls_interface INTERFACE
${header_file}
)
add_dependencies(
syscalls_interface
${header_file}
)
unset(header_file)
endforeach()
endfunction()
# Function to add header file(s) to the list to be passed to syscall generator
# if condition is true.
function(zephyr_syscall_header_ifdef feature_toggle)
if(${${feature_toggle}})
# Empty function for now. Will implement later.
zephyr_syscall_header(${ARGN})
endif()
endfunction()

View file

@ -419,22 +419,29 @@ def main():
invocations = {}
mrsh_defs = {}
mrsh_includes = {}
ids = []
ids_emit = []
ids_not_emit = []
table_entries = []
handlers = []
emit_list = []
for match_group, fn in syscalls:
for match_group, fn, to_emit in syscalls:
handler, inv, mrsh, sys_id, entry = analyze_fn(match_group, fn)
if fn not in invocations:
invocations[fn] = []
invocations[fn].append(inv)
ids.append(sys_id)
table_entries.append(entry)
handlers.append(handler)
if mrsh:
if to_emit:
ids_emit.append(sys_id)
table_entries.append(entry)
emit_list.append(handler)
else:
ids_not_emit.append(sys_id)
if mrsh and to_emit:
syscall = typename_split(match_group[0])[1]
mrsh_defs[syscall] = mrsh
mrsh_includes[syscall] = "#include <syscalls/%s>" % fn
@ -444,7 +451,7 @@ def main():
weak_defines = "".join([weak_template % name
for name in handlers
if not name in noweak])
if not name in noweak and name in emit_list])
# The "noweak" ones just get a regular declaration
weak_defines += "\n".join(["extern uintptr_t %s(uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6, void *ssf);"
@ -454,13 +461,23 @@ def main():
",\n\t".join(table_entries)))
# Listing header emitted to stdout
ids.sort()
ids.extend(["K_SYSCALL_BAD", "K_SYSCALL_LIMIT"])
ids_emit.sort()
ids_emit.extend(["K_SYSCALL_BAD", "K_SYSCALL_LIMIT"])
ids_as_defines = ""
for i, item in enumerate(ids):
for i, item in enumerate(ids_emit):
ids_as_defines += "#define {} {}\n".format(item, i)
if ids_not_emit:
# There are syscalls that are not used in the image but
# their IDs are used in the generated stubs. So need to
# make them usable but outside the syscall ID range.
ids_as_defines += "\n\n/* Following syscalls are not used in image */\n"
ids_not_emit.sort()
num_emitted_ids = len(ids_emit)
for i, item in enumerate(ids_not_emit):
ids_as_defines += "#define {} {}\n".format(item, i + num_emitted_ids)
with open(args.syscall_list, "w") as fp:
fp.write(list_template % ids_as_defines)

View file

@ -55,13 +55,38 @@ def tagged_struct_update(target_list, tag, contents):
target_list.extend(items)
def analyze_headers(multiple_directories):
def analyze_headers(include_dir, scan_dir, file_list):
syscall_ret = []
tagged_ret = {}
for tag in struct_tags:
tagged_ret[tag] = []
syscall_files = dict()
# Get the list of header files which contains syscalls to be emitted.
# If file_list does not exist, we emit all syscalls.
if file_list:
with open(file_list, "r", encoding="utf-8") as fp:
contents = fp.read()
for one_file in contents.split(";"):
if os.path.isfile(one_file):
syscall_files[one_file] = {"emit": True}
else:
sys.stderr.write(f"{one_file} does not exists!\n")
sys.exit(1)
multiple_directories = set()
if include_dir:
multiple_directories |= set(include_dir)
if scan_dir:
multiple_directories |= set(scan_dir)
# Look for source files under various directories.
# Due to "syscalls/*.h" being included unconditionally in various
# other header files. We must generate the associated syscall
# header files (e.g. for function stubs).
for base_path in multiple_directories:
for root, dirs, files in os.walk(base_path, topdown=True):
dirs.sort()
@ -76,23 +101,35 @@ def analyze_headers(multiple_directories):
'common.h'))):
continue
with open(path, "r", encoding="utf-8") as fp:
try:
contents = fp.read()
except Exception:
sys.stderr.write("Error decoding %s\n" % path)
raise
if path not in syscall_files:
if include_dir and base_path in include_dir:
syscall_files[path] = {"emit" : True}
else:
syscall_files[path] = {"emit" : False}
try:
syscall_result = [(mo.groups(), fn)
for mo in syscall_regex.finditer(contents)]
for tag in struct_tags:
tagged_struct_update(tagged_ret[tag], tag, contents)
except Exception:
sys.stderr.write("While parsing %s\n" % fn)
raise
# Parse files to extract syscall functions
for one_file in syscall_files:
with open(one_file, "r", encoding="utf-8") as fp:
try:
contents = fp.read()
except Exception:
sys.stderr.write("Error decoding %s\n" % path)
raise
syscall_ret.extend(syscall_result)
fn = os.path.basename(one_file)
try:
to_emit = syscall_files[one_file]["emit"] | args.emit_all_syscalls
syscall_result = [(mo.groups(), fn, to_emit)
for mo in syscall_regex.finditer(contents)]
for tag in struct_tags:
tagged_struct_update(tagged_ret[tag], tag, contents)
except Exception:
sys.stderr.write("While parsing %s\n" % fn)
raise
syscall_ret.extend(syscall_result)
return syscall_ret, tagged_ret
@ -116,16 +153,31 @@ def parse_args():
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter, allow_abbrev=False)
parser.add_argument("-i", "--include", required=True, action='append',
help='''include directories recursively scanned
for .h files. Can be specified multiple times:
-i topdir1 -i topdir2 ...''')
parser.add_argument(
"-i", "--include", required=False, action="append",
help="Include directories recursively scanned for .h files "
"containing syscalls that must be present in final binary. "
"Can be specified multiple times: -i topdir1 -i topdir2 ...")
parser.add_argument(
"--scan", required=False, action="append",
help="Scan directories recursively for .h files containing "
"syscalls that need stubs generated but may not need to "
"be present in final binary. Can be specified multiple "
"times.")
parser.add_argument(
"-j", "--json-file", required=True,
help="Write system call prototype information as json to file")
parser.add_argument(
"-t", "--tag-struct-file", required=True,
help="Write tagged struct name information as json to file")
parser.add_argument(
"--file-list", required=False,
help="Text file containing semi-colon separated list of "
"header file where only syscalls in these files "
"are emitted.")
parser.add_argument(
"--emit-all-syscalls", required=False, action="store_true",
help="Emit all potential syscalls in the tree")
args = parser.parse_args()
@ -133,7 +185,8 @@ def parse_args():
def main():
parse_args()
syscalls, tagged = analyze_headers(args.include)
syscalls, tagged = analyze_headers(args.include, args.scan,
args.file_list)
# Only write json files if they don't exist or have changes since
# they will force an incremental rebuild.