Skip to content

Commit

Permalink
feat: rework the FragmentedRegion trait to be more responsive
Browse files Browse the repository at this point in the history
Instead of listing every region at the start, and then fetching them,
reduce the time between the listing of one region and its fetch.
This is useful as regions of a live process can change size, and if
the size becomes smaller, the read can entirely fail (this happens
on windows for example).
  • Loading branch information
vthib committed Nov 17, 2023
1 parent eca0fd4 commit a1219cc
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 111 deletions.
39 changes: 23 additions & 16 deletions boreal/src/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,11 @@ pub enum Memory<'a> {
#[derive(Debug)]
pub struct Fragmented<'a> {
pub(crate) obj: Box<dyn FragmentedMemory + 'a>,
pub(crate) regions: Vec<RegionDescription>,
}

impl<'a> Memory<'a> {
pub(crate) fn new_fragmented(obj: Box<dyn FragmentedMemory + 'a>) -> Memory {
// Cache the regions in the object. This avoids reallocating a Vec everytime
// we list the regions.
let regions = obj.list_regions();

Memory::Fragmented(Fragmented { obj, regions })
Memory::Fragmented(Fragmented { obj })
}
}

Expand Down Expand Up @@ -80,7 +75,8 @@ impl Memory<'_> {
}
}
Self::Fragmented(fragmented) => {
for region in &fragmented.regions {
fragmented.obj.reset();
while let Some(region) = fragmented.obj.next() {
let Some(relative_start) = start.checked_sub(region.start) else {
break;
};
Expand All @@ -90,7 +86,7 @@ impl Memory<'_> {
let end = end.checked_sub(region.start)?;
let end = std::cmp::min(region.length, end);

let region = fragmented.obj.fetch_region(*region)?;
let region = fragmented.obj.fetch()?;
return region.mem.get(relative_start..end);
}

Expand All @@ -106,15 +102,14 @@ impl Memory<'_> {
/// single slice. The main use case is for example scanning the memory of a
/// process, which is arranged in non contiguous regions of mapped bytes.
pub trait FragmentedMemory: Send + Sync + std::fmt::Debug {
/// List non overlapping regions mapping the fragmented memory.
/// List the next region that can be scanned.
///
/// This listing should be cheap. Actually retrieving the memory behind a region
/// should only be done in the [`FragmentedMemory::fetch_region`] method.
/// This is also the reason why this function cannot fail, the regions should have been
/// precomputed already.
fn list_regions(&self) -> Vec<RegionDescription>;
/// If None is returned, the listing is considered complete.
fn next(&mut self) -> Option<RegionDescription>;

/// Fetch the data of a region.
/// Fetch the current region.
///
/// Fetch the region that was last returned by a call to [`FragmentedMemory::next`].
///
/// If unable to fetch, None must be returned. The region will be ignored,
/// but scanning will go on:
Expand All @@ -123,7 +118,19 @@ pub trait FragmentedMemory: Send + Sync + std::fmt::Debug {
/// if used).
/// - If the fetch was done during evaluation, the expression will evaluate
/// as `undefined`.
fn fetch_region(&mut self, region_desc: RegionDescription) -> Option<Region>;
fn fetch(&mut self) -> Option<Region>;

/// Reset the object.
///
/// This can be called to reset the object to its initial state. After this
/// function is called, a call to [`FragmentedMemory::next`] should list
/// the first region available.
///
/// This is used when multiple iterations over the memory regions are needed.
/// For example, a first iteration is done for string scanning, but this method can be
/// called to iterate on regions again when evaluating some conditions
/// that require access to specific regions.
fn reset(&mut self);
}

/// A description of a region of memory to scan.
Expand Down
4 changes: 2 additions & 2 deletions boreal/src/scanner/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,8 +516,8 @@ impl Inner {
}
Memory::Fragmented(fragmented) => {
// Scan each region for all variables occurences.
for region_desc in &fragmented.regions {
let Some(region) = fragmented.obj.fetch_region(*region_desc) else {
while fragmented.obj.next().is_some() {
let Some(region) = fragmented.obj.fetch() else {
continue;
};
self.ac_scan
Expand Down
45 changes: 29 additions & 16 deletions boreal/src/scanner/process/sys/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,13 @@ pub fn process_memory(pid: u32) -> Result<Box<dyn FragmentedMemory>, ScanError>
let mem_file = File::open(proc_pid_path.join("mem")).map_err(open_error_to_scan_error)?;

// Use /proc/pid/maps to list the memory regions to scan.
let file = File::open(proc_pid_path.join("maps")).map_err(open_error_to_scan_error)?;
let maps_file = File::open(proc_pid_path.join("maps")).map_err(open_error_to_scan_error)?;

let reader = BufReader::new(file);
let mut regions = Vec::new();
for line in reader.lines() {
let line = line.map_err(ScanError::CannotListProcessRegions)?;

if let Some(region) = parse_map_line(&line) {
regions.push(region);
}
}
Ok(Box::new(LinuxProcessMemory {
regions,
maps_file: BufReader::new(maps_file),
mem_file,
buffer: Vec::new(),
region: None,
}))
}

Expand Down Expand Up @@ -64,22 +56,43 @@ fn open_error_to_scan_error(open_error: std::io::Error) -> ScanError {

#[derive(Debug)]
struct LinuxProcessMemory {
// List of regions parsed from the /proc/pid/maps file
regions: Vec<RegionDescription>,
// Opened handle on /proc/pid/maps
maps_file: BufReader<File>,

// Opened handle on /proc/pid/mem.
mem_file: File,

// Buffer used to hold the duplicated process memory when fetched.
buffer: Vec<u8>,

// Current region.
region: Option<RegionDescription>,
}

impl FragmentedMemory for LinuxProcessMemory {
fn list_regions(&self) -> Vec<RegionDescription> {
self.regions.clone()
fn reset(&mut self) {
let _ = self.maps_file.rewind();
}

fn next(&mut self) -> Option<RegionDescription> {
let mut line = String::new();
self.region = loop {
line.clear();
if self.maps_file.read_line(&mut line).is_err() {
break None;
}
if line.is_empty() {
break None;
}
if let Some(desc) = parse_map_line(&line) {
break Some(desc);
}
};
self.region
}

fn fetch_region(&mut self, desc: RegionDescription) -> Option<Region> {
fn fetch(&mut self) -> Option<Region> {
let desc = self.region?;
let _ = self
.mem_file
.seek(SeekFrom::Start(desc.start as u64))
Expand Down
115 changes: 57 additions & 58 deletions boreal/src/scanner/process/sys/windows.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,60 +38,10 @@ pub fn process_memory(pid: u32) -> Result<Box<dyn FragmentedMemory>, ScanError>
}
};

let mut regions = Vec::new();
let mut start_addr: Option<usize> = None;
loop {
let mut info = MaybeUninit::uninit();
// Safety:
// - the handle is a valid process handle and has the PROCESS_QUERY_INFORMATION
// permission.
let res = unsafe {
VirtualQueryEx(
handle_to_windows_handle(handle.as_handle()),
start_addr.map(|v| v as *const c_void),
info.as_mut_ptr(),
std::mem::size_of::<MEMORY_BASIC_INFORMATION>(),
)
};

if res == 0 {
match std::io::Error::last_os_error() {
#[allow(clippy::cast_possible_wrap)]
err if err.raw_os_error() == Some(ERROR_INVALID_PARAMETER.0 as _) => {
// Returned when the start address is bigger than any region, so it indicates
// the end of the loop.
break;
}
err => {
return Err(ScanError::CannotListProcessRegions(err));
}
}
}

// Safety: returned value is not zero, so the function succeeded, and has filled
// the info object.
let info = unsafe { info.assume_init() };

if info.State == MEM_COMMIT && info.Protect != PAGE_NOACCESS {
regions.push(RegionDescription {
start: info.BaseAddress as usize,
length: info.RegionSize,
});
}
start_addr = match (info.BaseAddress as usize).checked_add(info.RegionSize) {
Some(v) => Some(v),
None => {
// If this happens, a region actually covers up to u64::MAX, so there cannot
// be any region past it. That's unlikely, but lets just be safe about it.
break;
}
};
}

Ok(Box::new(WindowsProcessMemory {
handle,
regions,
buffer: Vec::new(),
region: None,
}))
}

Expand All @@ -100,20 +50,69 @@ struct WindowsProcessMemory {
// Handle to the process being scanned.
handle: OwnedHandle,

// List of regions parsed from the /proc/pid/maps file
regions: Vec<RegionDescription>,

// Buffer used to hold the duplicated process memory when fetched.
buffer: Vec<u8>,

// Description of the current region.
region: Option<RegionDescription>,
}

impl FragmentedMemory for WindowsProcessMemory {
fn list_regions(&self) -> Vec<RegionDescription> {
self.regions.clone()
fn reset(&mut self) {
self.region = None;
}

fn next(&mut self) -> Option<RegionDescription> {
let mut next_addr = match self.region {
Some(region) => Some(region.start.checked_add(region.length)?),
None => None,
};
self.region = loop {
let mut info = MaybeUninit::uninit();
// Safety:
// - the handle is a valid process handle and has the PROCESS_QUERY_INFORMATION
// permission.
let res = unsafe {
VirtualQueryEx(
handle_to_windows_handle(self.handle.as_handle()),
next_addr.map(|v| v as *const c_void),
info.as_mut_ptr(),
std::mem::size_of::<MEMORY_BASIC_INFORMATION>(),
)
};

if res == 0 {
break None;
}

// Safety: returned value is not zero, so the function succeeded, and has filled
// the info object.
let info = unsafe { info.assume_init() };

next_addr = match (info.BaseAddress as usize).checked_add(info.RegionSize) {
Some(v) => Some(v),
None => {
// If this happens, a region actually covers up to u64::MAX, so there cannot
// be any region past it. That's unlikely, but lets just be safe about it.
break None;
}
};
if info.State == MEM_COMMIT && info.Protect != PAGE_NOACCESS {
break Some(RegionDescription {
start: info.BaseAddress as usize,
length: info.RegionSize,
});
}
};
self.region
}

fn fetch_region(&mut self, desc: RegionDescription) -> Option<Region> {
self.buffer.resize(desc.length, 0);
fn fetch(&mut self) -> Option<Region> {
let desc = self.region?;

// FIXME: make configurable
self.buffer
.resize(std::cmp::min(desc.length, 100 * 1024 * 1024), 0);

let mut nb_bytes_read = 0;
// Safety:
Expand Down
3 changes: 3 additions & 0 deletions boreal/tests/it/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ rule a {
}"#,
);

// Scanning ourselves can lead to a bit of a weird situation where the buffer
// used to hold each region grows and grows as it tries to contain... its own
// region. This will end when reaching the max size of the buffer.
checker.check_process(std::process::id(), false);

// This is "self0123456789scan" when xor'ed
Expand Down
54 changes: 35 additions & 19 deletions boreal/tests/it/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,10 @@ impl Checker {
pub fn check_fragmented(&mut self, regions: &[(usize, Option<&[u8]>)], expected_res: bool) {
let res = self
.scanner
.scan_fragmented(FragmentedSlices { regions })
.scan_fragmented(FragmentedSlices {
regions,
current: None,
})
.unwrap();
let res = !res.matched_rules.is_empty();
assert_eq!(res, expected_res, "test failed for boreal");
Expand Down Expand Up @@ -402,7 +405,10 @@ impl Checker {
let mut scanner = self.scanner.clone();
scanner.set_scan_params(scanner.scan_params().clone().compute_full_matches(true));
let res = scanner
.scan_fragmented(FragmentedSlices { regions })
.scan_fragmented(FragmentedSlices {
regions,
current: None,
})
.unwrap();
let res = get_boreal_full_matches(&res);
assert_eq!(res, expected, "test failed for boreal");
Expand Down Expand Up @@ -468,29 +474,39 @@ impl Checker {
#[derive(Debug)]
struct FragmentedSlices<'a, 'b> {
regions: &'b [(usize, Option<&'a [u8]>)],
current: Option<usize>,
}

impl FragmentedMemory for FragmentedSlices<'_, '_> {
fn list_regions(&self) -> Vec<RegionDescription> {
self.regions
.iter()
.map(|(start, mem)| RegionDescription {
start: *start,
length: mem.map_or(10, |v| v.len()),
})
.collect()
fn reset(&mut self) {
self.current = None;
}

fn fetch_region(&mut self, region_desc: RegionDescription) -> Option<Region> {
for (start, mem) in self.regions {
if *start == region_desc.start {
return Some(Region {
start: *start,
mem: (*mem)?,
});
}
fn next(&mut self) -> Option<RegionDescription> {
let current = match self.current {
Some(v) => v + 1,
None => 0,
};
self.current = Some(current);

if current < self.regions.len() {
let region = self.regions[current];
Some(RegionDescription {
start: region.0,
length: region.1.map_or(10, |v| v.len()),
})
} else {
None
}
unreachable!()
}

fn fetch(&mut self) -> Option<Region> {
self.regions.get(self.current?).and_then(|(start, mem)| {
Some(Region {
start: *start,
mem: (*mem)?,
})
})
}
}

Expand Down

0 comments on commit a1219cc

Please sign in to comment.