x86: mmu: relax KERNEL_VM_OFFSET == SRAM_OFFSET

There was a restriction that KERNEL_VM_OFFSET must equal to
SRAM_OFFSET so that page directory pointer (PDP) or page
directory (PD) can be reused. This is not very practical in
real world due to various hardware designs, especially those
where SRAM is not aligned to PDP or PD. So rework those bits.

Signed-off-by: Daniel Leung <daniel.leung@intel.com>
This commit is contained in:
Daniel Leung 2021-05-03 13:07:31 -07:00 committed by Anas Nashif
parent 52398df7b6
commit 37672958ac
7 changed files with 83 additions and 79 deletions

View file

@ -370,6 +370,7 @@ config X86_MAX_ADDITIONAL_MEM_DOMAINS
config X86_EXTRA_PAGE_TABLE_PAGES
int "Reserve extra pages in page table"
default 1 if X86_PAE && (KERNEL_VM_BASE != SRAM_BASE_ADDRESS)
default 0
depends on X86_MMU
help

View file

@ -68,6 +68,14 @@
rdmsr
orl $0x800, %eax
wrmsr
#else
/* Enable Page Size Extensions (allowing 4MB pages).
* This is ignored if PAE is enabled so no need to do
* this above in PAE code.
*/
movl %cr4, %eax
orl $CR4_PSE, %eax
movl %eax, %cr4
#endif /* CONFIG_X86_PAE */
/* Enable paging (CR0.PG, bit 31) / write protect (CR0.WP, bit 16) */

View file

@ -22,6 +22,10 @@ FUNC_NORETURN void z_x86_prep_c(void *arg)
_kernel.cpus[0].nested = 0;
#ifdef CONFIG_MMU
z_x86_mmu_init();
#endif
#if defined(CONFIG_LOAPIC)
z_loapic_enable(0);
#endif
@ -40,10 +44,6 @@ FUNC_NORETURN void z_x86_prep_c(void *arg)
ARG_UNUSED(info);
#endif
#ifdef CONFIG_MMU
z_x86_mmu_init();
#endif
#if CONFIG_X86_STACK_PROTECTION
for (int i = 0; i < CONFIG_MP_NUM_CPUS; i++) {
z_x86_set_stack_guard(z_interrupt_stacks[i]);

View file

@ -1122,40 +1122,57 @@ void arch_mem_map(void *virt, uintptr_t phys, size_t size, uint32_t flags)
MASK_ALL, 0);
}
static void identity_map_remove(void)
{
#ifdef Z_VM_KERNEL
size_t size, scope = get_entry_scope(0);
static void identity_map_remove(uint32_t level)
{
size_t size, scope = get_entry_scope(level);
pentry_t *table;
uint32_t cur_level;
uint8_t *pos;
pentry_t entry;
pentry_t *entry_ptr;
k_mem_region_align((uintptr_t *)&pos, &size,
(uintptr_t)CONFIG_SRAM_BASE_ADDRESS,
(size_t)CONFIG_SRAM_SIZE * 1024U, scope);
/* We booted with RAM mapped both to its identity and virtual
* mapping starting at CONFIG_KERNEL_VM_BASE. This was done by
* double-linking the relevant tables in the top-level table.
* At this point we don't need the identity mapping(s) any more,
* zero the top-level table entries corresponding to the
* physical mapping.
*/
while (size != 0U) {
pentry_t *entry = get_entry_ptr(z_x86_kernel_ptables, pos, 0);
/* Need to get to the correct table */
table = z_x86_kernel_ptables;
for (cur_level = 0; cur_level < level; cur_level++) {
entry = get_entry(table, pos, cur_level);
table = next_table(entry, level);
}
entry_ptr = get_entry_ptr(table, pos, level);
/* set_pte */
*entry = 0;
*entry_ptr = 0;
pos += scope;
size -= scope;
}
#endif
}
#endif
/* Invoked to remove the identity mappings in the page tables,
* they were only needed to tranisition the instruction pointer at early boot
*/
void z_x86_mmu_init(void)
{
identity_map_remove();
#ifdef Z_VM_KERNEL
/* We booted with physical address space being identity mapped.
* As we are now executing in virtual address space,
* the identity map is no longer needed. So remove them.
*
* Without PAE, only need to remove the entries at the PD level.
* With PAE, need to also remove the entry at PDP level.
*/
identity_map_remove(PDE_LEVEL);
#ifdef CONFIG_X86_PAE
identity_map_remove(0);
#endif
#endif
}
#if CONFIG_X86_STACK_PROTECTION

View file

@ -432,7 +432,7 @@ class PtableSet():
def reserve(self, virt_base, size, to_level=PT_LEVEL):
"""Reserve page table space with already aligned virt_base and size"""
debug("Reserving paging structures 0x%x (0x%x)" %
debug("Reserving paging structures for 0x%x (0x%x)" %
(virt_base, size))
align_check(virt_base, size)
@ -458,15 +458,11 @@ class PtableSet():
self.reserve(mem_start, mem_size, to_level)
def map(self, phys_base, virt_base, size, flags, level=PT_LEVEL, double_map=True):
def map(self, phys_base, virt_base, size, flags, level=PT_LEVEL):
"""Map an address range in the page tables provided access flags.
If virt_base is None, identity mapping using phys_base is done.
If virt_base is not the same address as phys_base, the same memory
will be double mapped to the virt_base address if double_map == True;
or normal mapping to virt_base if double_map == False.
"""
skip_vm_map = virt_base is None or virt_base == phys_base
is_identity_map = virt_base is None or virt_base == phys_base
if virt_base is None:
virt_base = phys_base
@ -479,53 +475,23 @@ class PtableSet():
align_check(phys_base, size, scope)
align_check(virt_base, size, scope)
for paddr in range(phys_base, phys_base + size, scope):
if paddr == 0 and skip_vm_map:
# Never map the NULL page
#
# If skip_vm_map, the identify map of physical
# memory will be unmapped at boot. So the actual
# NULL page will not be mapped after that.
if is_identity_map and paddr == 0 and level == PT_LEVEL:
# Never map the NULL page at page table level.
continue
vaddr = virt_base + (paddr - phys_base)
self.map_page(vaddr, paddr, flags, False, level)
if skip_vm_map or not double_map:
return
def identity_map_unaligned(self, phys_base, size, flags, level=PT_LEVEL):
"""Identity map a region of memory"""
scope = 1 << self.levels[level].addr_shift
# Find how much VM a top-level entry covers
scope = 1 << self.toplevel.addr_shift
debug("Double map %s entries with scope 0x%x" %
(self.toplevel.__class__.__name__, scope))
phys_aligned_base = round_down(phys_base, scope)
phys_aligned_end = round_up(phys_base + size, scope)
phys_aligned_size = phys_aligned_end - phys_aligned_base
# Round bases down to the entry granularity
pd_virt_base = round_down(virt_base, scope)
pd_phys_base = round_down(phys_base, scope)
size = size + (phys_base - pd_phys_base)
# The base addresses have to line up such that they can be mapped
# by the same second-level table
if phys_base - pd_phys_base != virt_base - pd_virt_base:
error("mis-aligned virtual 0x%x and physical base addresses 0x%x" %
(virt_base, phys_base))
# Round size up to entry granularity
size = round_up(size, scope)
for offset in range(0, size, scope):
cur_virt = pd_virt_base + offset
cur_phys = pd_phys_base + offset
# Get the physical address of the second-level table that
# maps the current chunk of virtual memory
table_link_phys = self.toplevel.lookup(cur_virt)
debug("copy mappings 0x%x - 0x%x to 0x%x, using table 0x%x" %
(cur_phys, cur_phys + scope - 1, cur_virt, table_link_phys))
# Link to the entry for the physical mapping (i.e. mirroring).
self.toplevel.map(cur_phys, table_link_phys, INT_FLAGS)
self.map(phys_aligned_base, None, phys_aligned_size, flags, level)
def set_region_perms(self, name, flags, level=PT_LEVEL):
"""Set access permissions for a named region that is already mapped
@ -723,7 +689,7 @@ def map_extra_regions(pt):
# Reserve space in page table, and map the region
pt.reserve_unaligned(virt, size, level)
pt.map(phys, virt, size, flags, level, double_map=False)
pt.map(phys, virt, size, flags, level)
def main():
@ -786,6 +752,10 @@ def main():
debug("Zephyr image: 0x%x - 0x%x size 0x%x" %
(image_base, image_base + image_size - 1, image_size))
if virt_to_phys_offset != 0:
debug("Physical address space: 0x%x - 0x%x size 0x%x" %
(sram_base, sram_base + sram_size - 1, sram_size))
is_perm_regions = isdef("CONFIG_SRAM_REGION_PERMISSIONS")
if image_size >= vm_size:
@ -804,6 +774,17 @@ def main():
# Map the zephyr image
pt.map(image_base_phys, image_base, image_size, map_flags | ENTRY_RW)
if virt_to_phys_offset != 0:
# Need to identity map the physical address space
# as it is needed during early boot process.
# This will be unmapped once z_x86_mmu_init()
# is called.
# Note that this only does the identity mapping
# at the page directory level to minimize wasted space.
pt.reserve_unaligned(image_base_phys, image_size, to_level=PD_LEVEL)
pt.identity_map_unaligned(image_base_phys, image_size,
FLAG_P | FLAG_RW | FLAG_SZ, level=PD_LEVEL)
if isdef("CONFIG_X86_64"):
# 64-bit has a special region in the first 64K to bootstrap other CPUs
# from real mode

View file

@ -53,6 +53,7 @@
#define CR0_PG BIT(31) /* enable paging */
#define CR0_WP BIT(16) /* honor W bit even when supervisor */
#define CR4_PSE BIT(4) /* Page size extension (4MB pages) */
#define CR4_PAE BIT(5) /* enable PAE */
#define CR4_OSFXSR BIT(9) /* enable SSE (OS FXSAVE/RSTOR) */

View file

@ -34,22 +34,22 @@ space before ``vm_enter`` inside :file:`arch/x86/core/ia32/crt0.S`.
After ``vm_enter``, code execution is done via virtual addresses
and data can be referred via their virtual addresses. This is
possible as the page table generation script
(:file:`arch/x86/gen_mmu.py`) copies the mappings at the top level
page table such that the same second level tables are used for both
identity and virutal memory mappings. Later in the boot process,
the entries for identity mapping at the top level page table is
(:file:`arch/x86/gen_mmu.py`) identity maps the physical addresses
at the page directory level, in addition to mapping virtual addresses
to the physical memory. Later in the boot process,
the entries for identity mapping at the page directory level are
cleared in :c:func:`z_x86_mmu_init()`, effectively removing
the identity mapping of physical memory. This unmapping must be done
for userspace isolation or else they would be able to access
restricted memory via physical addresses. Since the identity mapping
is done at the top level, there is no need to allocate additional
space for lower level tables in the whole page table structure,
or else the extra tables become wasted space once unmapped and
no longer referred. Because of this, there are restrictions on
where virtual address space can be:
is done at the page directory level, there is no need to allocate
additional space for the page table. However, additional space may
still be required for additional page directory table.
There are restrictions on where virtual address space can be:
- Physical and virtual address spaces must be disjoint. This is
required as the entries in top level will be cleared.
required as the entries in page directory table will be cleared.
If they are not disjoint, it would clear the entries needed for
virtual addresses.
@ -73,10 +73,6 @@ where virtual address space can be:
- Both ``CONFIG_SRAM_BASE_ADDRESS`` and ``CONFIG_KERNEL_VM_BASE``
must also align with the starting addresses of targeted regions.
- Due to re-using of second level entries, both
``CONFIG_SRAM_OFFSET`` and ``CONFIG_KERNEL_VM_OFFSET`` must be of
same value.
Specifying Additional Memory Mappings at Build Time
***************************************************