Skip to content

Commit

Permalink
feat(fork): only copy necessary mappings
Browse files Browse the repository at this point in the history
Do not copy page table entries that can be mapped correctly on-demand
when a page fault occurs.

Signed-off-by: Anhad Singh <[email protected]>
  • Loading branch information
Andy-Python-Programmer committed Jun 27, 2024
1 parent 15ca628 commit 9095737
Show file tree
Hide file tree
Showing 8 changed files with 256 additions and 282 deletions.
8 changes: 3 additions & 5 deletions src/aero_kernel/src/arch/x86_64/task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -313,11 +313,9 @@ impl ArchTask {
})
}

pub fn fork(&self) -> Result<Self, MapToError<Size4KiB>> {
pub fn fork(&self, address_space: AddressSpace) -> Result<Self, MapToError<Size4KiB>> {
assert!(self.user, "cannot fork a kernel task");

let new_address_space = AddressSpace::this().offset_page_table().fork()?;

// Since the fork function marks all of the userspace entries in both the forked
// and the parent address spaces as read only, we will flush the page table of the
// current process to trigger COW.
Expand Down Expand Up @@ -346,14 +344,14 @@ impl ArchTask {

*context = Context::default();
context.rip = fork_init as u64;
context.cr3 = new_address_space.cr3().start_address().as_u64();
context.cr3 = address_space.cr3().start_address().as_u64();

let fpu_storage = self.fpu_storage.unwrap().clone();

Ok(Self {
context: unsafe { Unique::new_unchecked(context) },
context_switch_rsp: VirtAddr::new(switch_stack as u64),
address_space: new_address_space,
address_space,
user: true,

// The FS and GS bases are inherited from the parent process.
Expand Down
12 changes: 12 additions & 0 deletions src/aero_kernel/src/fs/file_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,18 @@ impl FileHandle {
}
}

#[inline]
pub fn is_writable(&self) -> bool {
self.flags()
.intersects(OpenFlags::O_WRONLY | OpenFlags::O_RDWR)
}

#[inline]
pub fn is_readable(&self) -> bool {
self.flags()
.intersects(OpenFlags::O_RDONLY | OpenFlags::O_RDWR)
}

pub fn flags(&self) -> OpenFlags {
*self.flags.read()
}
Expand Down
2 changes: 1 addition & 1 deletion src/aero_kernel/src/fs/procfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ impl INodeInterface for LockedProcINode {
"start": map.start_addr.as_u64(),
"end": map.end_addr.as_u64(),
"flags": map.flags.bits(),
"protection": map.protection.bits(),
"protection": map.protection().bits(),
}));
});

Expand Down
3 changes: 2 additions & 1 deletion src/aero_kernel/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@
associated_type_defaults,
trait_upcasting,
asm_const,
sync_unsafe_cell
sync_unsafe_cell,
// effects
)]
// TODO(andypython): can we remove the dependency of "prelude_import" and "lang_items"?
// `lang_items` => is currently used for the personality function (`rust_eh_personality`).
Expand Down
220 changes: 42 additions & 178 deletions src/aero_kernel/src/mem/paging/mapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@
// Some code borrowed from the x86_64 crate (MIT + Apache) and add support for 5-level paging
// and some kernel specific features that cannot be directly done in the crate itself.

use core::ops::Range;

use crate::mem::AddressSpace;
use core::ops::{Range, RangeInclusive};

use super::addr::{PhysAddr, VirtAddr};
use super::page::{AddressNotAligned, Page, PageSize, PhysFrame, Size1GiB, Size2MiB, Size4KiB};
Expand Down Expand Up @@ -1139,187 +1137,53 @@ impl<'a> OffsetPageTable<'a> {
Ok(())
}

pub fn fork(&mut self) -> Result<AddressSpace, MapToError<Size4KiB>> {
let mut address_space = AddressSpace::new()?; // Allocate the new address space

let offset_table = address_space.offset_page_table();
let make_next_level = |table: &mut PageTable,
i: usize|
-> Result<(bool, &mut PageTable), MapToError<Size4KiB>> {
let entry = &mut table[i];
let created = if !entry.flags().contains(PageTableFlags::PRESENT) {
let frame = FRAME_ALLOCATOR
.allocate_frame()
.ok_or(MapToError::FrameAllocationFailed)?;

entry.set_frame(
frame,
PageTableFlags::PRESENT
| PageTableFlags::WRITABLE
| PageTableFlags::USER_ACCESSIBLE,
);

true
} else {
entry.set_flags(
PageTableFlags::PRESENT
| PageTableFlags::WRITABLE
| PageTableFlags::USER_ACCESSIBLE,
);

false
};

let page_table_ptr = {
let addr = entry.frame().unwrap().start_address().as_hhdm_virt();
addr.as_mut_ptr::<PageTable>()
};

let page_table: &mut PageTable = unsafe { &mut *page_table_ptr };
if created {
page_table.zero();
pub fn copy_page_range(&mut self, src: &mut OffsetPageTable, range: RangeInclusive<VirtAddr>) {
let mut map_to = |src: &mut OffsetPageTable, addr, frame, flags| match frame {
MappedFrame::Size4KiB(frame) => {
let page = Page::<Size4KiB>::containing_address(addr);

unsafe {
self.map_to_with_table_flags(
page,
frame,
flags,
PageTableFlags::PRESENT
| PageTableFlags::USER_ACCESSIBLE
| PageTableFlags::WRITABLE,
)
}
.unwrap()
// operating on an inactive page table
.ignore();

unsafe { src.update_flags(page, flags) }
.unwrap()
// caller is required to invalidate the TLB
.ignore();
}

Ok((created, page_table))
_ => todo!(),
};

let last_level_fork = |entry: &mut PageTableEntry, n1: &mut PageTable, i: usize| {
let mut flags = entry.flags();
let mut addr = *range.start();

// Check if the mapping is shared.
// if !flags.contains(PageTableFlags::BIT_10) {
// Setup copy on write page.
flags.remove(PageTableFlags::WRITABLE);
// }
while addr != *range.end() {
match src.translate(addr) {
TranslateResult::Mapped {
frame,
offset,
flags,
} => {
assert_eq!(offset, 0, "unaligned page range");
map_to(src, addr, frame, flags & !PageTableFlags::WRITABLE);
}

entry.set_flags(flags);
n1[i].set_frame(entry.frame().unwrap(), flags);
};
TranslateResult::NotMapped => {}
TranslateResult::InvalidFrameAddress(addr) => {
panic!("invalid frame address {:#x}", addr);
}
}

// We loop through each of the page table entries in the page table which are user
// accessible and we remove the writeable flag from the entry if present. This will
// make the page table entry copy on the first write. Then we clone the page table entry
// and place it in the new page table.
if self.inner.level_5_paging_enabled {
self.inner.page_table.for_entries_mut(
PageTableFlags::PRESENT | PageTableFlags::USER_ACCESSIBLE,
|i, _, table| {
let (_, n4) = make_next_level(offset_table.inner.page_table, i)?;
let mut count_4 = 0;

table.for_entries_mut(
PageTableFlags::PRESENT | PageTableFlags::USER_ACCESSIBLE,
|j, _, table| {
let (w3, n3) = make_next_level(n4, j)?;
let mut count_3 = 0;

if w3 {
count_4 += 1;
}

table.for_entries_mut(
PageTableFlags::PRESENT | PageTableFlags::USER_ACCESSIBLE,
|k, _, table| {
let (w2, n2) = make_next_level(n3, k)?;
let mut count_2 = 0;

if w2 {
count_3 += 1;
}

table.for_entries_mut(
PageTableFlags::PRESENT | PageTableFlags::USER_ACCESSIBLE,
|l, _, table| {
let (w1, n1) = make_next_level(n2, l)?;
let mut count_1 = 0;

if w1 {
count_2 += 1;
}

table.for_entries_mut(
PageTableFlags::PRESENT
| PageTableFlags::USER_ACCESSIBLE,
|i, entry, _| {
last_level_fork(entry, n1, i);

count_1 += 1;
Ok(())
},
)?;

n2[l].set_entry_count(count_1);
Ok(())
},
)?;

n3[k].set_entry_count(count_2);
Ok(())
},
)?;

n4[j].set_entry_count(count_3);
Ok(())
},
)?;

offset_table.inner.page_table[i].set_entry_count(count_4);
Ok(())
},
)?;
} else {
self.inner.page_table.for_entries_mut(
PageTableFlags::PRESENT | PageTableFlags::USER_ACCESSIBLE,
|i, _, table| {
let (_, n3) = make_next_level(offset_table.inner.page_table, i)?;
let mut count_3 = 0;

table.for_entries_mut(
PageTableFlags::PRESENT | PageTableFlags::USER_ACCESSIBLE,
|k, _, table| {
let (w2, n2) = make_next_level(n3, k)?;
let mut count_2 = 0;

if w2 {
count_3 += 1;
}

table.for_entries_mut(
PageTableFlags::PRESENT | PageTableFlags::USER_ACCESSIBLE,
|l, _, table| {
let (w1, n1) = make_next_level(n2, l)?;
let mut count_1 = 0;

if w1 {
count_2 += 1;
}

table.for_entries_mut(
PageTableFlags::PRESENT | PageTableFlags::USER_ACCESSIBLE,
|i, entry, _| {
last_level_fork(entry, n1, i);

count_1 += 1;
Ok(())
},
)?;

n2[l].set_entry_count(count_1);
Ok(())
},
)?;

n3[k].set_entry_count(count_2);
Ok(())
},
)?;

offset_table.inner.page_table[i].set_entry_count(count_3);
Ok(())
},
)?;
addr += Size4KiB::SIZE;
}

Ok(address_space)
}
}
3 changes: 1 addition & 2 deletions src/aero_kernel/src/syscall/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,8 +181,7 @@ pub fn mmap(
.current_task()
.file_table
.get_handle(fd)
.ok_or(SyscallError::EBADF)?
.dirnode(),
.ok_or(SyscallError::EBADF)?,
);
}

Expand Down
Loading

0 comments on commit 9095737

Please sign in to comment.