diff --git a/include/arch/x86/intel64/linker.ld b/include/arch/x86/intel64/linker.ld index ffa7949a90..ef091a4de6 100644 --- a/include/arch/x86/intel64/linker.ld +++ b/include/arch/x86/intel64/linker.ld @@ -9,14 +9,16 @@ #define ROMABLE_REGION RAM #define RAMABLE_REGION RAM +#define MMU_PAGE_ALIGN . = ALIGN(CONFIG_MMU_PAGE_SIZE); + /* Used to align areas with separate memory permission characteristics * so that the page permissions can be set in the MMU. Without this, * the kernel is just one blob with the same RWX permissions on all RAM */ #ifdef CONFIG_SRAM_REGION_PERMISSIONS - #define MMU_PAGE_ALIGN . = ALIGN(CONFIG_MMU_PAGE_SIZE); + #define MMU_PAGE_ALIGN_PERM MMU_PAGE_ALIGN #else - #define MMU_PAGE_ALIGN + #define MMU_PAGE_ALIGN_PERM #endif ENTRY(CONFIG_KERNEL_ENTRY) @@ -34,12 +36,12 @@ SECTIONS _locore_start = .; *(.locore) *(.locore.*) - MMU_PAGE_ALIGN + MMU_PAGE_ALIGN_PERM _locore_end = .; _lorodata_start = .; *(.lorodata) - MMU_PAGE_ALIGN + MMU_PAGE_ALIGN_PERM _lodata_start = .; *(.lodata) @@ -54,7 +56,7 @@ SECTIONS * On x86-64 the IDT is in rodata and doesn't need to be in the * trampoline page. */ - MMU_PAGE_ALIGN + MMU_PAGE_ALIGN_PERM z_shared_kernel_page_start = .; #endif /* CONFIG_X86_KPTI */ @@ -63,7 +65,7 @@ SECTIONS #ifdef CONFIG_X86_KPTI *(.trampolines) - MMU_PAGE_ALIGN + MMU_PAGE_ALIGN_PERM z_shared_kernel_page_end = .; ASSERT(z_shared_kernel_page_end - z_shared_kernel_page_start == 4096, @@ -93,7 +95,7 @@ SECTIONS #include - MMU_PAGE_ALIGN + MMU_PAGE_ALIGN_PERM } GROUP_LINK_IN(ROMABLE_REGION) _image_text_end = .; @@ -123,15 +125,15 @@ SECTIONS #include - MMU_PAGE_ALIGN + MMU_PAGE_ALIGN_PERM _image_rodata_end = .; _image_rodata_size = _image_rodata_end - _image_rodata_start; _image_rom_end = .; #ifdef CONFIG_USERSPACE /* APP SHARED MEMORY REGION */ -#define SMEM_PARTITION_ALIGN(size) MMU_PAGE_ALIGN -#define APP_SHARED_ALIGN MMU_PAGE_ALIGN +#define SMEM_PARTITION_ALIGN(size) MMU_PAGE_ALIGN_PERM +#define APP_SHARED_ALIGN MMU_PAGE_ALIGN_PERM #include @@ -148,7 +150,7 @@ SECTIONS SECTION_PROLOGUE(_BSS_SECTION_NAME, (NOLOAD), ALIGN(16)) { - MMU_PAGE_ALIGN + MMU_PAGE_ALIGN_PERM #ifndef CONFIG_USERSPACE _image_ram_start = .; #endif @@ -180,7 +182,7 @@ SECTIONS /* Must be last in RAM */ #include - . = ALIGN(CONFIG_MMU_PAGE_SIZE); + MMU_PAGE_ALIGN _image_ram_end = .; z_mapped_end = .; _end = .; diff --git a/include/sys/mem_manage.h b/include/sys/mem_manage.h index faa903b0f8..fdc279322e 100644 --- a/include/sys/mem_manage.h +++ b/include/sys/mem_manage.h @@ -51,9 +51,10 @@ extern "C" { /** * Map a physical memory region into the kernel's virtual address space * - * Given a physical address and a size, return a linear address - * representing the base of where the physical region is mapped in - * the virtual address space for the Zephyr kernel. + * This function is intended for mapping memory-mapped I/O regions into + * the virtual address space. Given a physical address and a size, return a + * linear address representing the base of where the physical region is mapped + * in the virtual address space for the Zephyr kernel. * * This function alters the active page tables in the area reserved * for the kernel. This function will choose the virtual address @@ -70,12 +71,18 @@ extern "C" { * with user access and code execution forbidden. This policy is changed * by passing K_MEM_CACHE_* and K_MEM_PERM_* macros into the 'flags' parameter. * - * If there is insufficient virtual address space for the mapping, or - * bad flags are passed in, or if additional memory is needed to update - * page tables that is not available, this will generate a kernel panic. + * If there is insufficient virtual address space for the mapping this will + * generate a kernel panic. * * This API is only available if CONFIG_MMU is enabled. * + * It is highly discouraged to use this function to map system RAM page + * frames. It may conflict with anonymous memory mappings and demand paging + * and produce undefined behavior. Do not use this for RAM unless you know + * exactly what you are doing. If you need a chunk of memory, use k_mem_map(). + * If you need a contiguous buffer of physical memory, statically declare it + * and pin it at build time, it will be mapped when the system boots. + * * This API is part of infrastructure still under development and may * change. * diff --git a/kernel/include/kernel_internal.h b/kernel/include/kernel_internal.h index a05925d7f3..3e0c4f2a45 100644 --- a/kernel/include/kernel_internal.h +++ b/kernel/include/kernel_internal.h @@ -192,6 +192,11 @@ void z_thread_mark_switched_out(void); #endif /* CONFIG_INSTRUMENT_THREAD_SWITCHING */ +/* Init hook for page frame management, invoked immediately upon entry of + * main thread, before POST_KERNEL tasks + */ +void z_mem_manage_init(void); + #ifdef __cplusplus } #endif diff --git a/kernel/include/mmu.h b/kernel/include/mmu.h new file mode 100644 index 0000000000..712e719ba3 --- /dev/null +++ b/kernel/include/mmu.h @@ -0,0 +1,198 @@ +/* + * Copyright (c) 2020 Intel Corporation. + * + * SPDX-License-Identifier: Apache-2.0 + */ +#ifndef KERNEL_INCLUDE_MMU_H +#define KERNEL_INCLUDE_MMU_H + +#ifdef CONFIG_MMU + +#include +#include +#include +#include +#include +#include + +/* + * At present, page frame management is only done for main system RAM, + * and we generate paging structures based on CONFIG_SRAM_BASE_ADDRESS + * and CONFIG_SRAM_SIZE. + * + * If we have other RAM regions (DCCM, etc) these typically have special + * properties and shouldn't be used generically for demand paging or + * anonymous mappings. We don't currently maintain an ontology of these in the + * core kernel. + */ +#define Z_PHYS_RAM_START ((uintptr_t)CONFIG_SRAM_BASE_ADDRESS) +#define Z_PHYS_RAM_SIZE ((size_t)KB(CONFIG_SRAM_SIZE)) +#define Z_PHYS_RAM_END (Z_PHYS_RAM_START + Z_PHYS_RAM_SIZE) +#define Z_NUM_PAGE_FRAMES (Z_PHYS_RAM_SIZE / CONFIG_MMU_PAGE_SIZE) + +/** End virtual address of virtual address space */ +#define Z_VIRT_RAM_START ((uint8_t *)CONFIG_KERNEL_VM_BASE) +#define Z_VIRT_RAM_SIZE ((size_t)CONFIG_KERNEL_VM_SIZE) +#define Z_VIRT_RAM_END (Z_VIRT_RAM_START + Z_VIRT_RAM_SIZE) + +/* Boot-time virtual location of the kernel image. */ +#define Z_KERNEL_VIRT_START ((uint8_t *)(&z_mapped_start)) +#define Z_KERNEL_VIRT_END ((uint8_t *)(&z_mapped_end)) +#define Z_KERNEL_VIRT_SIZE ((size_t)(&z_mapped_size)) + +/* + * Macros and data structures for physical page frame accounting, + * APIs for use by eviction and backing store algorithms. This code + * is otherwise not application-facing. + */ + +/* + * z_page_frame flags bits + */ + +/** This page contains critical kernel data and will never be swapped */ +#define Z_PAGE_FRAME_PINNED BIT(0) + +/** This physical page is reserved by hardware; we will never use it */ +#define Z_PAGE_FRAME_RESERVED BIT(1) + +/** + * This physical page is mapped to some virtual memory address + * + * Currently, we just support one mapping per page frame. If a page frame + * is mapped to multiple virtual pages then it must be pinned. + */ +#define Z_PAGE_FRAME_MAPPED BIT(2) + +/** + * This page frame is currently involved in a page-in/out operation + */ +#define Z_PAGE_FRAME_BUSY BIT(3) + +/** + * Data structure for physical page frames + * + * An array of these is instantiated, one element per physical RAM page. + * Hence it's necessary to constrain its size as much as possible. + */ +struct z_page_frame { + union { + /* If mapped, virtual address this page is mapped to */ + void *addr; + + /* If unmapped and available, free pages list membership. */ + sys_snode_t node; + }; + + /* Z_PAGE_FRAME_* flags */ + uint8_t flags; + + /* TODO: Backing store and eviction algorithms may both need to + * introduce custom members for accounting purposes. Come up with + * a layer of abstraction for this. They may also want additional + * flags bits which shouldn't clobber each other. At all costs + * the total size of struct z_page_frame must be minimized. + */ +} __packed; + +static inline bool z_page_frame_is_pinned(struct z_page_frame *pf) +{ + return (pf->flags & Z_PAGE_FRAME_PINNED) != 0; +} + +static inline bool z_page_frame_is_reserved(struct z_page_frame *pf) +{ + return (pf->flags & Z_PAGE_FRAME_RESERVED) != 0; +} + +static inline bool z_page_frame_is_mapped(struct z_page_frame *pf) +{ + return (pf->flags & Z_PAGE_FRAME_MAPPED) != 0; +} + +static inline bool z_page_frame_is_busy(struct z_page_frame *pf) +{ + return (pf->flags & Z_PAGE_FRAME_BUSY) != 0; +} + +static inline bool z_page_frame_is_evictable(struct z_page_frame *pf) +{ + return (!z_page_frame_is_reserved(pf) && z_page_frame_is_mapped(pf) && + !z_page_frame_is_pinned(pf) && !z_page_frame_is_busy(pf)); +} + +/* If true, page is not being used for anything, is not reserved, is a member + * of some free pages list, isn't busy, and may be mapped in memory + */ +static inline bool z_page_frame_is_available(struct z_page_frame *page) +{ + return page->flags == 0; +} + +static inline void z_assert_phys_aligned(uintptr_t phys) +{ + __ASSERT(phys % CONFIG_MMU_PAGE_SIZE == 0, + "physical address 0x%lx is not page-aligned", phys); + (void)phys; +} + +/* Reserved pages */ +#define Z_VM_RESERVED 0 + +extern struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES]; + +static inline uintptr_t z_page_frame_to_phys(struct z_page_frame *pf) +{ + return (uintptr_t)((pf - z_page_frames) * CONFIG_MMU_PAGE_SIZE) + + Z_PHYS_RAM_START; +} + +/* Presumes there is but one mapping in the virtual address space */ +static inline void *z_page_frame_to_virt(struct z_page_frame *pf) +{ + return pf->addr; +} + +static inline bool z_is_page_frame(uintptr_t phys) +{ + z_assert_phys_aligned(phys); + return (phys >= Z_PHYS_RAM_START) && (phys < Z_PHYS_RAM_END); +} + +static inline struct z_page_frame *z_phys_to_page_frame(uintptr_t phys) +{ + __ASSERT(z_is_page_frame(phys), + "0x%lx not an SRAM physical address", phys); + + return &z_page_frames[(phys - Z_PHYS_RAM_START) / + CONFIG_MMU_PAGE_SIZE]; +} + +static inline void z_mem_assert_virtual_region(uint8_t *addr, size_t size) +{ + __ASSERT((uintptr_t)addr % CONFIG_MMU_PAGE_SIZE == 0, + "unaligned addr %p", addr); + __ASSERT(size % CONFIG_MMU_PAGE_SIZE == 0, + "unaligned size %zu", size); + __ASSERT(addr + size > addr, + "region %p size %zu zero or wraps around", addr, size); + __ASSERT(addr >= Z_VIRT_RAM_START && addr + size < Z_VIRT_RAM_END, + "invalid virtual address region %p (%zu)", addr, size); +} + +/* Debug function, pretty-print page frame information for all frames + * concisely to printk. + */ +void z_page_frames_dump(void); + +/* Number of free page frames. This information may go stale immediately */ +extern size_t z_free_page_count; + +/* Convenience macro for iterating over all page frames */ +#define Z_PAGE_FRAME_FOREACH(_phys, _pageframe) \ + for (_phys = Z_PHYS_RAM_START, _pageframe = z_page_frames; \ + _phys < Z_PHYS_RAM_END; \ + _phys += CONFIG_MMU_PAGE_SIZE, _pageframe++) + +#endif /* CONFIG_MMU */ +#endif /* KERNEL_INCLUDE_MMU_H */ diff --git a/kernel/init.c b/kernel/init.c index 9e2956a6b1..7d1ce4a995 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -136,6 +136,14 @@ static void bg_thread_main(void *unused1, void *unused2, void *unused3) ARG_UNUSED(unused2); ARG_UNUSED(unused3); +#ifdef CONFIG_MMU + /* Invoked here such that backing store or eviction algorithms may + * initialize kernel objects, and that all POST_KERNEL and later tasks + * may perform memory management tasks (except for z_phys_map() which + * is allowed at any time) + */ + z_mem_manage_init(); +#endif /* CONFIG_MMU */ z_sys_post_kernel = true; z_sys_init_run_level(_SYS_INIT_LEVEL_POST_KERNEL); diff --git a/kernel/mmu.c b/kernel/mmu.c index b6180614f2..200ae4295c 100644 --- a/kernel/mmu.c +++ b/kernel/mmu.c @@ -9,28 +9,144 @@ #include #include #include +#include +#include +#include +#include #include LOG_MODULE_DECLARE(os, CONFIG_KERNEL_LOG_LEVEL); +/* + * General terminology: + * - A page frame is a page-sized physical memory region in RAM. It is a + * container where a data page may be placed. It is always referred to by + * physical address. We have a convention of using uintptr_t for physical + * addresses. We instantiate a struct z_page_frame to store metadata for + * every page frame. + * + * - A data page is a page-sized region of data. It may exist in a page frame, + * or be paged out to some backing store. Its location can always be looked + * up in the CPU's page tables (or equivalent) by virtual address. + * The data type will always be void * or in some cases uint8_t * when we + * want to do pointer arithmetic. + */ + /* Spinlock to protect any globals in this file and serialize page table * updates in arch code */ -static struct k_spinlock mm_lock; +struct k_spinlock z_mm_lock; /* - * Overall virtual memory map. When the kernel starts, it is expected that all - * memory regions are mapped into one large virtual region at the beginning of - * CONFIG_KERNEL_VM_BASE. Unused virtual memory up to the limit noted by - * CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings. + * General page frame management + */ + +/* Database of all RAM page frames */ +struct z_page_frame z_page_frames[Z_NUM_PAGE_FRAMES]; + +#if __ASSERT_ON +/* Indicator that z_page_frames has been initialized, many of these APIs do + * not work before POST_KERNEL + */ +static bool page_frames_initialized; +#endif + +/* Add colors to page table dumps to indicate mapping type */ +#define COLOR_PAGE_FRAMES 1 + +#if COLOR_PAGE_FRAMES +#define ANSI_DEFAULT "\x1B[0m" +#define ANSI_RED "\x1B[1;31m" +#define ANSI_GREEN "\x1B[1;32m" +#define ANSI_YELLOW "\x1B[1;33m" +#define ANSI_BLUE "\x1B[1;34m" +#define ANSI_MAGENTA "\x1B[1;35m" +#define ANSI_CYAN "\x1B[1;36m" +#define ANSI_GREY "\x1B[1;90m" + +#define COLOR(x) printk(_CONCAT(ANSI_, x)) +#else +#define COLOR(x) do { } while (0) +#endif + +static void page_frame_dump(struct z_page_frame *pf) +{ + if (z_page_frame_is_reserved(pf)) { + COLOR(CYAN); + printk("R"); + } else if (z_page_frame_is_busy(pf)) { + COLOR(MAGENTA); + printk("B"); + } else if (z_page_frame_is_pinned(pf)) { + COLOR(YELLOW); + printk("P"); + } else if (z_page_frame_is_available(pf)) { + COLOR(GREY); + printk("."); + } else if (z_page_frame_is_mapped(pf)) { + COLOR(DEFAULT); + printk("M"); + } else { + COLOR(RED); + printk("?"); + } +} + +void z_page_frames_dump(void) +{ + int column = 0; + + __ASSERT(page_frames_initialized, "%s called too early", __func__); + printk("Physical memory from 0x%lx to 0x%lx\n", + Z_PHYS_RAM_START, Z_PHYS_RAM_END); + + for (int i = 0; i < Z_NUM_PAGE_FRAMES; i++) { + struct z_page_frame *pf = &z_page_frames[i]; + + page_frame_dump(pf); + + column++; + if (column == 64) { + column = 0; + printk("\n"); + } + } + + COLOR(DEFAULT); + if (column != 0) { + printk("\n"); + } +} + +#define VIRT_FOREACH(_base, _size, _pos) \ + for (_pos = _base; \ + _pos < ((uint8_t *)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE) + +#define PHYS_FOREACH(_base, _size, _pos) \ + for (_pos = _base; \ + _pos < ((uintptr_t)_base + _size); _pos += CONFIG_MMU_PAGE_SIZE) + +/* + * Virtual address space management * - * +--------------+ <- CONFIG_KERNEL_VM_BASE + * Call all of these functions with z_mm_lock held. + * + * Overall virtual memory map: When the kernel starts, it resides in + * virtual memory in the region Z_BOOT_KERNEL_VIRT_START to + * Z_BOOT_KERNEL_VIRT_END. Unused virtual memory past this, up to the limit + * noted by CONFIG_KERNEL_VM_SIZE may be used for runtime memory mappings. + * + * +--------------+ <- Z_VIRT_ADDR_START + * | Undefined VM | <- May contain ancillary regions like x86_64's locore + * +--------------+ <- Z_BOOT_KERNEL_VIRT_START (often == Z_VIRT_ADDR_START) * | Mapping for | - * | all RAM | + * | main kernel | + * | image | + * | | + * | | + * +--------------+ <- Z_BOOT_KERNEL_VIRT_END * | | - * | | - * +--------------+ <- CONFIG_KERNEL_VM_BASE + CONFIG_KERNEL_RAM_SIZE - * | Available | also the mapping limit as mappings grown downward - * | virtual mem | + * | Unused, | + * | Available VM | * | | * |..............| <- mapping_pos (grows downward as more mappings are made) * | Mapping | @@ -40,31 +156,183 @@ static struct k_spinlock mm_lock; * | ... | * +--------------+ * | Mapping | - * +--------------+ <- CONFIG_KERNEL_VM_BASE + CONFIG_KERNEL_VM_SIZE + * +--------------+ <- mappings start here + * | Reserved | <- special purpose virtual page(s) of size Z_VM_RESERVED + * +--------------+ <- Z_VIRT_RAM_END * - * At the moment we just have one area for mappings and they are permanent. - * This is under heavy development and may change. + * At the moment we just have one downward-growing area for mappings. + * There is currently no support for un-mapping memory, see #28900. + */ +static uint8_t *mapping_pos = Z_VIRT_RAM_END - Z_VM_RESERVED; + +/* Get a chunk of virtual memory and mark it as being in-use. + * + * This may be called from arch early boot code before z_cstart() is invoked. + * Data will be copied and BSS zeroed, but this must not rely on any + * initialization functions being called prior to work correctly. + */ +static void *virt_region_get(size_t size) +{ + uint8_t *dest_addr; + + if ((mapping_pos - size) < Z_KERNEL_VIRT_END) { + LOG_ERR("insufficient virtual address space (requested %zu)", + size); + return NULL; + } + + mapping_pos -= size; + dest_addr = mapping_pos; + + return dest_addr; +} + +/* + * Free page frames management + * + * Call all of these functions with z_mm_lock held. */ - /* Current position for memory mappings in kernel memory. - * At the moment, all kernel memory mappings are permanent. - * Memory mappings start at the end of the address space, and grow - * downward. - * - * All of this is under heavy development and is subject to change. - */ -static uint8_t *mapping_pos = - (uint8_t *)((uintptr_t)CONFIG_KERNEL_VM_BASE + - (uintptr_t)CONFIG_KERNEL_VM_SIZE); - -/* Lower-limit of virtual address mapping. Immediately below this is the - * permanent identity mapping for all SRAM. +/* Linked list of unused and available page frames. + * + * TODO: This is very simple and treats all free page frames as being equal. + * However, there are use-cases to consolidate free pages such that entire + * SRAM banks can be switched off to save power, and so obtaining free pages + * may require a more complex ontology which prefers page frames in RAM banks + * which are still active. + * + * This implies in the future there may be multiple slists managing physical + * pages. Each page frame will still just have one snode link. */ -static uint8_t *mapping_limit = - (uint8_t *)((uintptr_t)CONFIG_KERNEL_VM_BASE + - (size_t)CONFIG_KERNEL_RAM_SIZE); +static sys_slist_t free_page_frame_list; -size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size, +/* Number of unused and available free page frames */ +size_t z_free_page_count; + +#define PF_ASSERT(pf, expr, fmt, ...) \ + __ASSERT(expr, "page frame 0x%lx: " fmt, z_page_frame_to_phys(pf), \ + ##__VA_ARGS__) + +/* Get an unused page frame. don't care which one, or NULL if there are none */ +static struct z_page_frame *free_page_frame_list_get(void) +{ + sys_snode_t *node; + struct z_page_frame *pf = NULL; + + node = sys_slist_get(&free_page_frame_list); + if (node != NULL) { + z_free_page_count--; + pf = CONTAINER_OF(node, struct z_page_frame, node); + PF_ASSERT(pf, z_page_frame_is_available(pf), + "unavailable but somehow on free list"); + } + + return pf; +} + +/* Release a page frame back into the list of free pages */ +static void free_page_frame_list_put(struct z_page_frame *pf) +{ + PF_ASSERT(pf, z_page_frame_is_available(pf), + "unavailable page put on free list"); + sys_slist_append(&free_page_frame_list, &pf->node); + z_free_page_count++; +} + +static void free_page_frame_list_init(void) +{ + sys_slist_init(&free_page_frame_list); +} + +/* + * Memory Mapping + */ + +/* Called after the frame is mapped in the arch layer, to update our + * local ontology (and do some assertions while we're at it) + */ +static void frame_mapped_set(struct z_page_frame *pf, void *addr) +{ + PF_ASSERT(pf, !z_page_frame_is_reserved(pf), + "attempted to map a reserved page frame"); + + /* We do allow multiple mappings for pinned page frames + * since we will never need to reverse map them. + * This is uncommon, use-cases are for things like the + * Zephyr equivalent of VSDOs + */ + PF_ASSERT(pf, !z_page_frame_is_mapped(pf) || z_page_frame_is_pinned(pf), + "non-pinned and already mapped to %p", pf->addr); + + pf->flags |= Z_PAGE_FRAME_MAPPED; + pf->addr = addr; + pf->refcount++; +} + + +/* This may be called from arch early boot code before z_cstart() is invoked. + * Data will be copied and BSS zeroed, but this must not rely on any + * initialization functions being called prior to work correctly. + */ +void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags) +{ + uintptr_t aligned_phys, addr_offset; + size_t aligned_size; + int ret; + k_spinlock_key_t key; + uint8_t *dest_addr; + + addr_offset = k_mem_region_align(&aligned_phys, &aligned_size, + phys, size, + CONFIG_MMU_PAGE_SIZE); + __ASSERT(aligned_size != 0, "0-length mapping at 0x%lx", aligned_phys); + __ASSERT(aligned_phys < (aligned_phys + (aligned_size - 1)), + "wraparound for physical address 0x%lx (size %zu)", + aligned_phys, aligned_size); + + key = k_spin_lock(&z_mm_lock); + /* Obtain an appropriately sized chunk of virtual memory */ + dest_addr = virt_region_get(aligned_size); + if (!dest_addr) { + goto fail; + } + + /* If this fails there's something amiss with virt_region_get */ + __ASSERT((uintptr_t)dest_addr < + ((uintptr_t)dest_addr + (size - 1)), + "wraparound for virtual address %p (size %zu)", + dest_addr, size); + + LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu", dest_addr, + aligned_phys, aligned_size, flags, addr_offset); + + ret = arch_mem_map(dest_addr, aligned_phys, aligned_size, flags); + if (ret != 0) { + LOG_ERR("arch_mem_map() failed with %d", ret); + goto fail; + } + k_spin_unlock(&z_mm_lock, key); + + *virt_ptr = dest_addr + addr_offset; + return; +fail: + /* May re-visit this in the future, but for now running out of + * virtual address space or failing the arch_mem_map() call is + * an unrecoverable situation. + * + * Other problems not related to resource exhaustion we leave as + * assertions since they are clearly programming mistakes. + */ + LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed", + phys, size, flags); + k_panic(); +} + +/* + * Miscellaneous + */ + +size_t k_mem_region_align(uintptr_t *aligned_phys, size_t *aligned_size, uintptr_t phys_addr, size_t size, size_t align) { size_t addr_offset; @@ -72,66 +340,58 @@ size_t k_mem_region_align(uintptr_t *aligned_addr, size_t *aligned_size, /* The actual mapped region must be page-aligned. Round down the * physical address and pad the region size appropriately */ - *aligned_addr = ROUND_DOWN(phys_addr, align); - addr_offset = phys_addr - *aligned_addr; + *aligned_phys = ROUND_DOWN(phys_addr, align); + addr_offset = phys_addr - *aligned_phys; *aligned_size = ROUND_UP(size + addr_offset, align); return addr_offset; } -void z_phys_map(uint8_t **virt_ptr, uintptr_t phys, size_t size, uint32_t flags) +#define VM_OFFSET ((CONFIG_KERNEL_VM_BASE + CONFIG_KERNEL_VM_OFFSET) - \ + CONFIG_SRAM_BASE_ADDRESS) + +/* Only applies to boot RAM mappings within the Zephyr image that have never + * been remapped or paged out. Never use this unless you know exactly what you + * are doing. + */ +#define BOOT_VIRT_TO_PHYS(virt) ((uintptr_t)(((uint8_t *)virt) + VM_OFFSET)) + +void z_mem_manage_init(void) { - uintptr_t aligned_addr, addr_offset; - size_t aligned_size; - int ret; - k_spinlock_key_t key; - uint8_t *dest_virt; + uintptr_t phys; + uint8_t *addr; + struct z_page_frame *pf; + k_spinlock_key_t key = k_spin_lock(&z_mm_lock); - addr_offset = k_mem_region_align(&aligned_addr, &aligned_size, - phys, size, - CONFIG_MMU_PAGE_SIZE); + free_page_frame_list_init(); - key = k_spin_lock(&mm_lock); - - /* Carve out some unused virtual memory from the top of the - * address space +#ifdef CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES + /* If some page frames are unavailable for use as memory, arch + * code will mark Z_PAGE_FRAME_RESERVED in their flags */ - if ((mapping_pos - aligned_size) < mapping_limit) { - LOG_ERR("insufficient kernel virtual address space"); - goto fail; + arch_reserved_pages_update(); +#endif /* CONFIG_ARCH_HAS_RESERVED_PAGE_FRAMES */ + + /* All pages composing the Zephyr image are mapped at boot in a + * predictable way. This can change at runtime. + */ + VIRT_FOREACH(Z_KERNEL_VIRT_START, Z_KERNEL_VIRT_SIZE, addr) + { + frame_mapped_set(z_phys_to_page_frame(BOOT_VIRT_TO_PHYS(addr)), + addr); } - mapping_pos -= aligned_size; - dest_virt = mapping_pos; - LOG_DBG("arch_mem_map(%p, 0x%lx, %zu, %x) offset %lu\n", dest_virt, - aligned_addr, aligned_size, flags, addr_offset); - __ASSERT(dest_virt != NULL, "NULL page memory mapping"); - __ASSERT(aligned_size != 0, "0-length mapping at 0x%lx", aligned_addr); - __ASSERT((uintptr_t)dest_virt < - ((uintptr_t)dest_virt + (aligned_size - 1)), - "wraparound for virtual address %p (size %zu)", - dest_virt, size); - __ASSERT(aligned_addr < (aligned_addr + (size - 1)), - "wraparound for physical address 0x%lx (size %zu)", - aligned_addr, size); - - ret = arch_mem_map(dest_virt, aligned_addr, aligned_size, flags); - k_spin_unlock(&mm_lock, key); - - if (ret == 0) { - *virt_ptr = dest_virt + addr_offset; - } else { - /* This happens if there is an insurmountable problem - * with the selected cache modes or access flags - * with no safe fallback - */ - - LOG_ERR("arch_mem_map() to %p returned %d", dest_virt, ret); - goto fail; + /* Any remaining pages that aren't mapped, reserved, or pinned get + * added to the free pages list + */ + Z_PAGE_FRAME_FOREACH(phys, pf) { + if (z_page_frame_is_available(pf)) { + free_page_frame_list_put(pf); + } } - return; -fail: - LOG_ERR("memory mapping 0x%lx (size %zu, flags 0x%x) failed", - phys, size, flags); - k_panic(); + LOG_DBG("free page frames: %zu", z_free_page_count); +#if __ASSERT_ON + page_frames_initialized = true; +#endif + k_spin_unlock(&z_mm_lock, key); }