Basic mmap module

This commit is contained in:
Dennis Zhuang
2022-05-28 19:12:45 +08:00
committed by Jeong Yunwon
parent cc4583ef50
commit d4f001fa21
7 changed files with 367 additions and 47 deletions

8
Cargo.lock generated
View File

@@ -890,9 +890,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.123"
version = "0.2.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb691a747a7ab48abc15c5b42066eaafde10dc427e3b6ee2a1cf43db04c763bd"
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836"
[[package]]
name = "libffi"
@@ -998,9 +998,9 @@ checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memmap2"
version = "0.5.3"
version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f"
checksum = "d5172b50c23043ff43dd53e51392f36519d9b35a8f3a410d30ece5d1aedd58ae"
dependencies = [
"libc",
]

View File

@@ -41,7 +41,7 @@ rustpython-stdlib = {path = "stdlib", optional = true, default-features = false,
dirs = { package = "dirs-next", version = "2.0.0" }
num-traits = "0.2.14"
cfg-if = "1.0.0"
libc = "0.2.123"
libc = "0.2.126"
flame = { version = "0.2.2", optional = true }
flamescope = { version = "0.1.2", optional = true }

View File

@@ -21,7 +21,7 @@ siphasher = "0.3"
rand = "0.8"
volatile = "0.3"
radium = "0.7"
libc = "0.2.101"
libc = "0.2.126"
ascii = "1.0"
unic-ucd-category = "0.9"

View File

@@ -57,14 +57,17 @@ hex = "0.4.3"
puruspe = "0.1.5"
nix = "0.23.1"
xml-rs = "0.8.4"
libc = "0.2.123"
libc = "0.2.126"
cfg-if = "1.0.0"
ahash = "0.7.6"
libz-sys = { version = "1.1.5", optional = true }
num_enum = "0.5.7"
ascii = "1.0.0"
memmap2 = "0.5.0"
page_size = "0.4.2"
# mmap
[target.'cfg(all(unix, not(target_arch = "wasm32")))'.dependencies]
memmap2 = "0.5.4"
page_size = "0.4"
[target.'cfg(all(unix, not(target_os = "redox")))'.dependencies]
termios = "0.3.3"

View File

@@ -1,12 +1,23 @@
//! mmap module
pub(crate) use mmap::make_module;
#[pymodule]
mod mmap {
use crate::common::lock::{PyMutex, PyMutexGuard};
use crate::vm::{
builtins::PyTypeRef, convert::ToPyResult, function::OptionalArg, types::Constructor,
FromArgs, PyObject, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine,
builtins::{PyInt, PyIntRef, PyTypeRef},
function::FuncArgs,
sliceable::saturate_index,
types::Constructor,
FromArgs, PyObject, PyPayload, PyRef, PyResult, TryFromBorrowedObject, VirtualMachine,
};
use memmap2::{MmapMut, MmapOptions};
use crossbeam_utils::atomic::AtomicCell;
use memmap2::{Mmap, MmapMut, MmapOptions};
use nix::unistd;
use std::fs::File;
use std::ops::Deref;
#[cfg(all(unix, not(target_os = "redox")))]
use std::os::unix::io::{FromRawFd, IntoRawFd, RawFd};
#[repr(C)]
#[derive(PartialEq, Eq, Debug)]
@@ -31,7 +42,27 @@ mod mmap {
}
#[pyattr]
use libc::{MAP_ANON, MAP_ANONYMOUS, MAP_PRIVATE, MAP_SHARED, PROT_READ, PROT_WRITE};
use libc::{
MADV_DONTNEED, MADV_NORMAL, MADV_RANDOM, MADV_SEQUENTIAL, MADV_WILLNEED, MAP_ANON,
MAP_ANONYMOUS, MAP_PRIVATE, MAP_SHARED, PROT_READ, PROT_WRITE,
};
#[cfg(target_os = "macos")]
#[pyattr]
use libc::{MADV_FREE_REUSABLE, MADV_FREE_REUSE};
#[cfg(target_os = "linux")]
#[pyattr]
use libc::{
MADV_DODUMP, MADV_DOFORK, MADV_DONTDUMP, MADV_DONTFORK, MADV_FREE, MADV_HUGEPAGE,
MADV_HWPOISON, MADV_MERGEABLE, MADV_NOHUGEPAGE, MADV_REMOVE, MADV_SOFT_OFFLINE,
MADV_UNMERGEABLE,
};
#[cfg(all(target_os = "linux", target_arch = "x86_64", target_env = "gnu"))]
#[pyattr]
use libc::{MAP_DENYWRITE, MAP_EXECUTABLE, MAP_POPULATE};
#[pyattr]
const ACCESS_DEFAULT: u32 = AccessMode::Default as u32;
#[pyattr]
@@ -41,25 +72,47 @@ mod mmap {
#[pyattr]
const ACCESS_COPY: u32 = AccessMode::Copy as u32;
#[pyattr(name = "PAGESIZE")]
fn pagesize(vm: &VirtualMachine) -> usize {
#[cfg(all(unix, not(target_arch = "wasm32")))]
#[pyattr(name = "PAGESIZE", once)]
fn page_size(_vm: &VirtualMachine) -> usize {
page_size::get()
}
#[cfg(all(unix, not(target_arch = "wasm32")))]
#[pyattr(name = "ALLOCATIONGRANULARITY", once)]
fn granularity(_vm: &VirtualMachine) -> usize {
page_size::get_granularity()
}
#[pyattr(name = "error", once)]
fn error_type(vm: &VirtualMachine) -> PyTypeRef {
vm.ctx.exceptions.os_error.to_owned()
}
#[derive(Debug)]
enum MmapObj {
Write(MmapMut),
Read(Mmap),
}
#[pyattr]
#[pyclass(name = "mmap")]
#[derive(Debug, PyPayload)]
struct PyMmap {
mmap: MmapMut,
exports: usize,
// PyObject *weakreflist;
closed: AtomicCell<bool>,
mmap: PyMutex<Option<MmapObj>>,
fd: RawFd,
offset: isize,
size: AtomicCell<isize>,
pos: AtomicCell<isize>, // relative to offset
exports: AtomicCell<usize>,
access: AccessMode,
}
#[derive(FromArgs)]
struct MmapNewArgs {
#[pyarg(any)]
fileno: std::os::unix::io::RawFd,
fileno: RawFd,
#[pyarg(any)]
length: isize,
#[pyarg(any, default = "MAP_SHARED")]
@@ -69,16 +122,46 @@ mod mmap {
#[pyarg(any, default = "AccessMode::Default")]
access: AccessMode,
#[pyarg(any, default = "0")]
offset: u64,
offset: isize,
}
#[derive(FromArgs)]
pub struct FlushOptions {
#[pyarg(positional, default)]
offset: Option<isize>,
#[pyarg(positional, default)]
size: Option<isize>,
}
#[derive(FromArgs, Clone)]
pub struct FindOptions {
#[pyarg(positional)]
sub: Vec<u8>,
#[pyarg(positional, default)]
start: Option<isize>,
#[pyarg(positional, default)]
end: Option<isize>,
}
#[derive(FromArgs)]
pub struct AdviseOptions {
#[pyarg(positional)]
option: libc::c_int,
#[pyarg(positional, default)]
start: Option<isize>,
#[pyarg(positional, default)]
length: Option<isize>,
}
impl Constructor for PyMmap {
type Args = MmapNewArgs;
// TODO: Windows is not supported right now.
#[cfg(all(unix, not(target_os = "redox")))]
fn py_new(
cls: PyTypeRef,
MmapNewArgs {
fileno: fd,
fileno: mut fd,
length,
flags,
prot,
@@ -87,14 +170,19 @@ mod mmap {
}: Self::Args,
vm: &VirtualMachine,
) -> PyResult {
if length < 0 {
let mut map_size = length;
if map_size < 0 {
return Err(
vm.new_overflow_error("memory mapped length must be positive".to_owned())
);
}
// if offset < 0 {
// return Err(vm.new_overflow_error("memory mapped offset must be positive".to_owned()));
// }
if offset < 0 {
return Err(
vm.new_overflow_error("memory mapped offset must be positive".to_owned())
);
}
if (access != AccessMode::Default)
&& ((flags != MAP_SHARED) || (prot != (PROT_WRITE | PROT_READ)))
{
@@ -103,7 +191,8 @@ mod mmap {
));
}
let (flags, prot, access) = match access {
// TODO: memmap2 doesn't support mapping with pro and flags right now
let (_flags, _prot, access) = match access {
AccessMode::Read => (MAP_SHARED, PROT_READ, access),
AccessMode::Write => (MAP_SHARED, PROT_READ | PROT_WRITE, access),
AccessMode::Copy => (MAP_PRIVATE, PROT_READ | PROT_WRITE, access),
@@ -117,40 +206,263 @@ mod mmap {
};
(flags, prot, access)
}
_ => return Err(vm.new_value_error("mmap invalid access parameter.".to_owned())),
};
let mut mmap_opt = MmapOptions::new();
let mmap_opt = mmap_opt.offset(offset);
// .len(map_size)
let mmap = match access {
AccessMode::Write => unsafe { mmap_opt.map_mut(fd) },
// AccessMode::Read => mmap_opt.map(fd),
AccessMode::Copy => unsafe { mmap_opt.map_copy(fd) },
_ => unreachable!("access must be decided before here"),
if fd != -1 {
let file = unsafe { File::from_raw_fd(fd) };
let file_len = match file.metadata() {
Ok(m) => m.len().try_into().expect("file size overflow"),
Err(e) => return Err(vm.new_os_error(e.to_string())),
};
// File::from_raw_fd will consume the fd, so we
// have to get it again.
fd = file.into_raw_fd();
if map_size == 0 {
if file_len == 0 {
return Err(vm.new_value_error("cannot mmap an empty file".to_owned()));
}
if offset > file_len {
return Err(
vm.new_value_error("mmap offset is greater than file size".to_owned())
);
}
//if file_len - offset > isize::MAX {
// return Err(vm.new_value_error("mmap length is too large".to_owned()));
//}
map_size = file_len - offset;
} else if offset > file_len || file_len - offset < map_size {
return Err(
vm.new_value_error("mmap length is greater than file size".to_owned())
);
}
}
.map_err(|_| vm.new_value_error("FIXME: mmap error".to_owned()))?;
let mut mmap_opt = MmapOptions::new();
let mmap_opt = mmap_opt
.offset(offset.try_into().unwrap())
.len(map_size.try_into().unwrap());
let (fd, mmap) = if fd == -1 {
(
fd,
MmapObj::Write(
mmap_opt
.map_anon()
.map_err(|e| vm.new_os_error(e.to_string()))?,
),
)
} else {
let new_fd = unistd::dup(fd).map_err(|e| vm.new_os_error(e.to_string()))?;
let mmap = match access {
AccessMode::Default | AccessMode::Write => MmapObj::Write(
unsafe { mmap_opt.map_mut(fd) }
.map_err(|e| vm.new_os_error(e.to_string()))?,
),
AccessMode::Read => MmapObj::Read(
unsafe { mmap_opt.map(fd) }.map_err(|e| vm.new_os_error(e.to_string()))?,
),
AccessMode::Copy => MmapObj::Write(
unsafe { mmap_opt.map_copy(fd) }
.map_err(|e| vm.new_os_error(e.to_string()))?,
),
};
(new_fd, mmap)
};
let m_obj = Self {
mmap,
exports: 0,
closed: AtomicCell::new(false),
mmap: PyMutex::new(Some(mmap)),
fd,
offset,
size: AtomicCell::new(map_size),
pos: AtomicCell::new(0),
exports: AtomicCell::new(0),
access,
};
m_obj.to_pyresult(vm)
m_obj.into_ref_with_type(vm, cls).map(Into::into)
}
}
#[pyimpl]
#[pyimpl(with(Constructor), flags(BASETYPE))]
impl PyMmap {
#[pymethod]
fn close(&self) -> PyResult<()> {
if self.exports > 0 {
// PyErr_SetString(PyExc_BufferError, "cannot close "\
// "exported pointers exist");
#[pymethod(magic)]
pub(crate) fn len(&self) -> usize {
self.inner_size() as usize
}
#[inline]
fn inner_size(&self) -> isize {
self.size.load()
}
#[inline]
fn inner_pos(&self) -> isize {
self.pos.load()
}
fn check_valid(&self, vm: &VirtualMachine) -> PyResult<PyMutexGuard<Option<MmapObj>>> {
let m = self.mmap.lock();
if m.is_none() {
return Err(vm.new_value_error("mmap closed or invalid".to_owned()));
}
// self.mmap = MmapMut::map_anon(0).unwrap();
Ok(m)
}
#[pyproperty]
fn closed(&self) -> bool {
self.closed.load()
}
#[pymethod(magic)]
fn repr(zelf: PyRef<Self>) -> PyResult<String> {
let mmap = zelf.mmap.lock();
if mmap.is_none() {
return Ok("<mmap.mmap closed=True>".to_owned());
}
let access_str = match zelf.access {
AccessMode::Default => "ACCESS_DEFAULT",
AccessMode::Read => "ACCESS_READ",
AccessMode::Write => "ACCESS_WRITE",
AccessMode::Copy => "ACCESS_COPY",
};
let repr = format!(
"<mmap.mmap closed=False, access={}, length={}, pos={}, offset={}>",
access_str,
zelf.len(),
zelf.inner_pos(),
zelf.offset
);
Ok(repr)
}
#[pymethod]
fn close(&self, vm: &VirtualMachine) -> PyResult<()> {
if self.closed() {
return Ok(());
}
if self.exports.load() > 0 {
return Err(vm.new_buffer_error("cannot close exported pointers exist.".to_owned()));
}
let mut mmap = self.mmap.lock();
self.closed.store(true);
*mmap = None;
Ok(())
}
fn get_find_range(&self, options: FindOptions) -> (usize, usize) {
let pos = self.inner_pos();
let size = self.inner_size();
let start = options.start.unwrap_or(pos);
let end = options.end.unwrap_or(size);
let size = size.try_into().unwrap();
(saturate_index(start, size), saturate_index(end, size))
}
#[pymethod]
fn find(&self, options: FindOptions, vm: &VirtualMachine) -> PyResult<PyInt> {
let (start, end) = self.get_find_range(options.clone());
let sub = &options.sub;
if sub.is_empty() {
return Ok(PyInt::from(0isize));
}
let mmap = self.check_valid(vm)?;
let buf = match mmap.as_ref().unwrap() {
MmapObj::Read(mmap) => &mmap[start..end],
MmapObj::Write(mmap) => &mmap[start..end],
};
let pos = buf.windows(sub.len()).position(|window| window == sub);
Ok(pos.map_or(PyInt::from(-1isize), |i| PyInt::from(start + i)))
}
#[pymethod]
fn rfind(&self, options: FindOptions, vm: &VirtualMachine) -> PyResult<PyInt> {
let (start, end) = self.get_find_range(options.clone());
let sub = &options.sub;
if sub.is_empty() {
return Ok(PyInt::from(0isize));
}
let mmap = self.check_valid(vm)?;
let buf = match mmap.as_ref().unwrap() {
MmapObj::Read(mmap) => &mmap[start..end],
MmapObj::Write(mmap) => &mmap[start..end],
};
let pos = buf.windows(sub.len()).rposition(|window| window == sub);
Ok(pos.map_or(PyInt::from(-1isize), |i| PyInt::from(start + i)))
}
#[pymethod]
fn flush(&self, options: FlushOptions, vm: &VirtualMachine) -> PyResult<()> {
let offset = options.offset.unwrap_or(0);
let size = options.size.unwrap_or_else(|| self.inner_size());
if size < 0 || offset < 0 || self.inner_size() - offset < size {
return Err(vm.new_value_error("flush values out of range".to_owned()));
}
let size = size as usize;
let offset = offset as usize;
if self.access == AccessMode::Read || self.access == AccessMode::Copy {
return Ok(());
}
match self.check_valid(vm)?.deref().as_ref().unwrap() {
MmapObj::Read(_mmap) => {}
MmapObj::Write(mmap) => {
mmap.flush_range(offset, size)
.map_err(|e| vm.new_os_error(e.to_string()))?;
}
}
Ok(())
}
#[pymethod]
fn size(&self, vm: &VirtualMachine) -> PyResult<PyIntRef> {
let new_fd = unistd::dup(self.fd).map_err(|e| vm.new_os_error(e.to_string()))?;
let file = unsafe { File::from_raw_fd(new_fd) };
let file_len = match file.metadata() {
Ok(m) => m.len(),
Err(e) => return Err(vm.new_os_error(e.to_string())),
};
Ok(PyInt::from(file_len).into_ref(vm))
}
#[pymethod]
fn tell(&self) -> PyResult<isize> {
Ok(self.inner_pos())
}
#[pymethod(magic)]
fn enter(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<PyRef<Self>> {
let _m = zelf.check_valid(vm)?;
Ok(zelf.to_owned())
}
#[pymethod(magic)]
fn exit(zelf: PyRef<Self>, _args: FuncArgs, vm: &VirtualMachine) -> PyResult<()> {
zelf.close(vm)
}
}
}

View File

@@ -48,7 +48,7 @@ hexf-parse = "0.2.1"
indexmap = "1.8.1"
ahash = "0.7.6"
bitflags = "1.3.2"
libc = "0.2.123"
libc = "0.2.126"
nix = "0.23.1"
paste = "1.0.7"
is-macro = "0.2.0"

View File

@@ -151,6 +151,11 @@ impl VirtualMachine {
self.new_exception_msg(os_error, msg)
}
pub fn new_system_error(&self, msg: String) -> PyBaseExceptionRef {
let sys_error = self.ctx.exceptions.system_error.to_owned();
self.new_exception_msg(sys_error, msg)
}
pub fn new_unicode_decode_error(&self, msg: String) -> PyBaseExceptionRef {
let unicode_decode_error = self.ctx.exceptions.unicode_decode_error.to_owned();
self.new_exception_msg(unicode_decode_error, msg)