mirror of
https://github.com/RustPython/RustPython.git
synced 2026-06-02 19:39:49 +09:00
Implement fsencode/fsdecode for FsPath
This commit is contained in:
3
Lib/test/test_socket.py
vendored
3
Lib/test/test_socket.py
vendored
@@ -1578,7 +1578,6 @@ class GeneralModuleTests(unittest.TestCase):
|
||||
# only IP addresses are allowed
|
||||
self.assertRaises(OSError, socket.getnameinfo, ('mail.python.org',0), 0)
|
||||
|
||||
@unittest.expectedFailureIf(sys.platform != "darwin", "TODO: RUSTPYTHON; socket.gethostbyname_ex")
|
||||
@unittest.skipUnless(support.is_resource_enabled('network'),
|
||||
'network is not enabled')
|
||||
def test_idna(self):
|
||||
@@ -5519,8 +5518,6 @@ class TestUnixDomain(unittest.TestCase):
|
||||
self.addCleanup(os_helper.unlink, path)
|
||||
self.assertEqual(self.sock.getsockname(), path)
|
||||
|
||||
# TODO: RUSTPYTHON, surrogateescape
|
||||
@unittest.expectedFailure
|
||||
def testSurrogateescapeBind(self):
|
||||
# Test binding to a valid non-ASCII pathname, with the
|
||||
# non-ASCII bytes supplied using surrogateescape encoding.
|
||||
|
||||
@@ -930,10 +930,15 @@ mod _socket {
|
||||
match family {
|
||||
#[cfg(unix)]
|
||||
c::AF_UNIX => {
|
||||
use crate::vm::function::ArgStrOrBytesLike;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
let buf = crate::vm::function::ArgStrOrBytesLike::try_from_object(vm, addr)?;
|
||||
let path = &*buf.borrow_bytes();
|
||||
socket2::SockAddr::unix(ffi::OsStr::from_bytes(path))
|
||||
let buf = ArgStrOrBytesLike::try_from_object(vm, addr)?;
|
||||
let bytes = &*buf.borrow_bytes();
|
||||
let path = match &buf {
|
||||
ArgStrOrBytesLike::Buf(_) => ffi::OsStr::from_bytes(bytes).into(),
|
||||
ArgStrOrBytesLike::Str(s) => vm.fsencode(s)?,
|
||||
};
|
||||
socket2::SockAddr::unix(path)
|
||||
.map_err(|_| vm.new_os_error("AF_UNIX path too long".to_owned()).into())
|
||||
}
|
||||
c::AF_INET => {
|
||||
@@ -1704,7 +1709,7 @@ mod _socket {
|
||||
let path = ffi::OsStr::as_bytes(addr.as_pathname().unwrap_or("".as_ref()).as_ref());
|
||||
let nul_pos = memchr::memchr(b'\0', path).unwrap_or(path.len());
|
||||
let path = ffi::OsStr::from_bytes(&path[..nul_pos]);
|
||||
return vm.ctx.new_str(path.to_string_lossy()).into();
|
||||
return vm.fsdecode(path).into();
|
||||
}
|
||||
// TODO: support more address families
|
||||
(String::new(), 0).to_pyobject(vm)
|
||||
|
||||
@@ -5,7 +5,7 @@ use crate::{
|
||||
function::PyStr,
|
||||
protocol::PyBuffer,
|
||||
};
|
||||
use std::{ffi::OsStr, path::PathBuf};
|
||||
use std::{borrow::Cow, ffi::OsStr, path::PathBuf};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum FsPath {
|
||||
@@ -58,15 +58,11 @@ impl FsPath {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn as_os_str(&self, vm: &VirtualMachine) -> PyResult<&OsStr> {
|
||||
pub fn as_os_str(&self, vm: &VirtualMachine) -> PyResult<Cow<'_, OsStr>> {
|
||||
// TODO: FS encodings
|
||||
match self {
|
||||
FsPath::Str(s) => {
|
||||
// XXX RUSTPYTHON: this is sketchy on windows; it's not guaranteed that its
|
||||
// OsStr encoding will always be compatible with WTF-8.
|
||||
Ok(unsafe { OsStr::from_encoded_bytes_unchecked(s.as_wtf8().as_bytes()) })
|
||||
}
|
||||
FsPath::Bytes(b) => Self::bytes_as_osstr(b.as_bytes(), vm),
|
||||
FsPath::Str(s) => vm.fsencode(s),
|
||||
FsPath::Bytes(b) => Self::bytes_as_osstr(b.as_bytes(), vm).map(Cow::Borrowed),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -21,28 +21,14 @@ pub(super) enum OutputMode {
|
||||
}
|
||||
|
||||
impl OutputMode {
|
||||
pub(super) fn process_path(self, path: impl Into<PathBuf>, vm: &VirtualMachine) -> PyResult {
|
||||
fn inner(mode: OutputMode, path: PathBuf, vm: &VirtualMachine) -> PyResult {
|
||||
let path_as_string = |p: PathBuf| {
|
||||
p.into_os_string().into_string().map_err(|_| {
|
||||
vm.new_unicode_decode_error(
|
||||
"Can't convert OS path to valid UTF-8 string".into(),
|
||||
)
|
||||
})
|
||||
};
|
||||
pub(super) fn process_path(self, path: impl Into<PathBuf>, vm: &VirtualMachine) -> PyObjectRef {
|
||||
fn inner(mode: OutputMode, path: PathBuf, vm: &VirtualMachine) -> PyObjectRef {
|
||||
match mode {
|
||||
OutputMode::String => path_as_string(path).map(|s| vm.ctx.new_str(s).into()),
|
||||
OutputMode::Bytes => {
|
||||
#[cfg(any(unix, target_os = "wasi"))]
|
||||
{
|
||||
use rustpython_common::os::ffi::OsStringExt;
|
||||
Ok(vm.ctx.new_bytes(path.into_os_string().into_vec()).into())
|
||||
}
|
||||
#[cfg(windows)]
|
||||
{
|
||||
path_as_string(path).map(|s| vm.ctx.new_bytes(s.into_bytes()).into())
|
||||
}
|
||||
}
|
||||
OutputMode::String => vm.fsdecode(path).into(),
|
||||
OutputMode::Bytes => vm
|
||||
.ctx
|
||||
.new_bytes(path.into_os_string().into_encoded_bytes())
|
||||
.into(),
|
||||
}
|
||||
}
|
||||
inner(self, path.into(), vm)
|
||||
@@ -59,7 +45,7 @@ impl OsPath {
|
||||
}
|
||||
|
||||
pub(crate) fn from_fspath(fspath: FsPath, vm: &VirtualMachine) -> PyResult<OsPath> {
|
||||
let path = fspath.as_os_str(vm)?.to_owned();
|
||||
let path = fspath.as_os_str(vm)?.into_owned();
|
||||
let mode = match fspath {
|
||||
FsPath::Str(_) => OutputMode::String,
|
||||
FsPath::Bytes(_) => OutputMode::Bytes,
|
||||
@@ -88,7 +74,7 @@ impl OsPath {
|
||||
widestring::WideCString::from_os_str(&self.path).map_err(|err| err.to_pyexception(vm))
|
||||
}
|
||||
|
||||
pub fn filename(&self, vm: &VirtualMachine) -> PyResult {
|
||||
pub fn filename(&self, vm: &VirtualMachine) -> PyObjectRef {
|
||||
self.mode.process_path(self.path.clone(), vm)
|
||||
}
|
||||
}
|
||||
@@ -133,7 +119,7 @@ impl From<OsPath> for OsPathOrFd {
|
||||
impl OsPathOrFd {
|
||||
pub fn filename(&self, vm: &VirtualMachine) -> PyObjectRef {
|
||||
match self {
|
||||
OsPathOrFd::Path(path) => path.filename(vm).unwrap_or_else(|_| vm.ctx.none()),
|
||||
OsPathOrFd::Path(path) => path.filename(vm),
|
||||
OsPathOrFd::Fd(fd) => vm.ctx.new_int(*fd).into(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -312,7 +312,12 @@ mod _codecs {
|
||||
|
||||
#[pyfunction]
|
||||
fn utf_8_encode(args: EncodeArgs, vm: &VirtualMachine) -> EncodeResult {
|
||||
if args.s.is_utf8() {
|
||||
if args.s.is_utf8()
|
||||
|| args
|
||||
.errors
|
||||
.as_ref()
|
||||
.is_some_and(|s| s.is(identifier!(vm, surrogatepass)))
|
||||
{
|
||||
return Ok((args.s.as_bytes().to_vec(), args.s.byte_len()));
|
||||
}
|
||||
do_codec!(utf8::encode, args, vm)
|
||||
|
||||
@@ -2225,7 +2225,7 @@ mod _io {
|
||||
*data = None;
|
||||
|
||||
let encoding = match args.encoding {
|
||||
None if vm.state.settings.utf8_mode > 0 => PyStr::from("utf-8").into_ref(&vm.ctx),
|
||||
None if vm.state.settings.utf8_mode > 0 => identifier!(vm, utf_8).to_owned(),
|
||||
Some(enc) if enc.as_wtf8() != "locale" => enc,
|
||||
_ => {
|
||||
// None without utf8_mode or "locale" encoding
|
||||
@@ -2238,7 +2238,7 @@ mod _io {
|
||||
|
||||
let errors = args
|
||||
.errors
|
||||
.unwrap_or_else(|| PyStr::from("strict").into_ref(&vm.ctx));
|
||||
.unwrap_or_else(|| identifier!(vm, strict).to_owned());
|
||||
|
||||
let has_read1 = vm.get_attribute_opt(buffer.clone(), "read1")?.is_some();
|
||||
let seekable = vm.call_method(&buffer, "seekable", ())?.try_to_bool(vm)?;
|
||||
|
||||
@@ -249,7 +249,7 @@ pub(crate) mod module {
|
||||
.as_ref()
|
||||
.canonicalize()
|
||||
.map_err(|e| e.to_pyexception(vm))?;
|
||||
path.mode.process_path(real, vm)
|
||||
Ok(path.mode.process_path(real, vm))
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
@@ -282,7 +282,7 @@ pub(crate) mod module {
|
||||
}
|
||||
}
|
||||
let buffer = widestring::WideCString::from_vec_truncate(buffer);
|
||||
path.mode.process_path(buffer.to_os_string(), vm)
|
||||
Ok(path.mode.process_path(buffer.to_os_string(), vm))
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
@@ -297,7 +297,7 @@ pub(crate) mod module {
|
||||
return Err(errno_err(vm));
|
||||
}
|
||||
let buffer = widestring::WideCString::from_vec_truncate(buffer);
|
||||
path.mode.process_path(buffer.to_os_string(), vm)
|
||||
Ok(path.mode.process_path(buffer.to_os_string(), vm))
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
|
||||
@@ -332,7 +332,7 @@ pub(super) mod _os {
|
||||
};
|
||||
dir_iter
|
||||
.map(|entry| match entry {
|
||||
Ok(entry_path) => path.mode.process_path(entry_path.file_name(), vm),
|
||||
Ok(entry_path) => Ok(path.mode.process_path(entry_path.file_name(), vm)),
|
||||
Err(err) => Err(IOErrorBuilder::with_filename(&err, path.clone(), vm)),
|
||||
})
|
||||
.collect::<PyResult<_>>()?
|
||||
@@ -352,22 +352,18 @@ pub(super) mod _os {
|
||||
let mut dir =
|
||||
nix::dir::Dir::from_fd(new_fd).map_err(|e| e.into_pyexception(vm))?;
|
||||
dir.iter()
|
||||
.filter_map(|entry| {
|
||||
entry
|
||||
.map_err(|e| e.into_pyexception(vm))
|
||||
.and_then(|entry| {
|
||||
let fname = entry.file_name().to_bytes();
|
||||
Ok(match fname {
|
||||
b"." | b".." => None,
|
||||
_ => Some(
|
||||
OutputMode::String
|
||||
.process_path(ffi::OsStr::from_bytes(fname), vm)?,
|
||||
),
|
||||
})
|
||||
})
|
||||
.transpose()
|
||||
.filter_map_ok(|entry| {
|
||||
let fname = entry.file_name().to_bytes();
|
||||
match fname {
|
||||
b"." | b".." => None,
|
||||
_ => Some(
|
||||
OutputMode::String
|
||||
.process_path(ffi::OsStr::from_bytes(fname), vm),
|
||||
),
|
||||
}
|
||||
})
|
||||
.collect::<PyResult<_>>()?
|
||||
.collect::<Result<_, _>>()
|
||||
.map_err(|e| e.into_pyexception(vm))?
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -429,7 +425,7 @@ pub(super) mod _os {
|
||||
let [] = dir_fd.0;
|
||||
let path =
|
||||
fs::read_link(&path).map_err(|err| IOErrorBuilder::with_filename(&err, path, vm))?;
|
||||
mode.process_path(path, vm)
|
||||
Ok(mode.process_path(path, vm))
|
||||
}
|
||||
|
||||
#[pyattr]
|
||||
@@ -452,12 +448,12 @@ pub(super) mod _os {
|
||||
impl DirEntry {
|
||||
#[pygetset]
|
||||
fn name(&self, vm: &VirtualMachine) -> PyResult {
|
||||
self.mode.process_path(&self.file_name, vm)
|
||||
Ok(self.mode.process_path(&self.file_name, vm))
|
||||
}
|
||||
|
||||
#[pygetset]
|
||||
fn path(&self, vm: &VirtualMachine) -> PyResult {
|
||||
self.mode.process_path(&self.pathval, vm)
|
||||
Ok(self.mode.process_path(&self.pathval, vm))
|
||||
}
|
||||
|
||||
fn perform_on_metadata(
|
||||
@@ -908,12 +904,12 @@ pub(super) mod _os {
|
||||
|
||||
#[pyfunction]
|
||||
fn getcwd(vm: &VirtualMachine) -> PyResult {
|
||||
OutputMode::String.process_path(curdir_inner(vm)?, vm)
|
||||
Ok(OutputMode::String.process_path(curdir_inner(vm)?, vm))
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn getcwdb(vm: &VirtualMachine) -> PyResult {
|
||||
OutputMode::Bytes.process_path(curdir_inner(vm)?, vm)
|
||||
Ok(OutputMode::Bytes.process_path(curdir_inner(vm)?, vm))
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
|
||||
@@ -458,21 +458,13 @@ mod sys {
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
fn getfilesystemencoding(_vm: &VirtualMachine) -> String {
|
||||
// TODO: implement non-utf-8 mode.
|
||||
"utf-8".to_owned()
|
||||
fn getfilesystemencoding(vm: &VirtualMachine) -> PyStrRef {
|
||||
vm.fs_encoding().to_owned()
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
#[pyfunction]
|
||||
fn getfilesystemencodeerrors(_vm: &VirtualMachine) -> String {
|
||||
"surrogateescape".to_owned()
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
#[pyfunction]
|
||||
fn getfilesystemencodeerrors(_vm: &VirtualMachine) -> String {
|
||||
"surrogatepass".to_owned()
|
||||
fn getfilesystemencodeerrors(vm: &VirtualMachine) -> PyStrRef {
|
||||
vm.fs_encode_errors().to_owned()
|
||||
}
|
||||
|
||||
#[pyfunction]
|
||||
|
||||
@@ -51,7 +51,7 @@ pub struct Context {
|
||||
}
|
||||
|
||||
macro_rules! declare_const_name {
|
||||
($($name:ident,)*) => {
|
||||
($($name:ident$(: $s:literal)?,)*) => {
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[allow(non_snake_case)]
|
||||
pub struct ConstName {
|
||||
@@ -61,11 +61,13 @@ macro_rules! declare_const_name {
|
||||
impl ConstName {
|
||||
unsafe fn new(pool: &StringPool, typ: &PyTypeRef) -> Self {
|
||||
Self {
|
||||
$($name: unsafe { pool.intern(stringify!($name), typ.clone()) },)*
|
||||
$($name: unsafe { pool.intern(declare_const_name!(@string $name $($s)?), typ.clone()) },)*
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
(@string $name:ident) => { stringify!($name) };
|
||||
(@string $name:ident $string:literal) => { $string };
|
||||
}
|
||||
|
||||
declare_const_name! {
|
||||
@@ -236,6 +238,15 @@ declare_const_name! {
|
||||
flush,
|
||||
close,
|
||||
WarningMessage,
|
||||
strict,
|
||||
ignore,
|
||||
replace,
|
||||
xmlcharrefreplace,
|
||||
backslashreplace,
|
||||
namereplace,
|
||||
surrogatepass,
|
||||
surrogateescape,
|
||||
utf_8: "utf-8",
|
||||
}
|
||||
|
||||
// Basic objects:
|
||||
|
||||
@@ -41,11 +41,12 @@ use nix::{
|
||||
sys::signal::{SaFlags, SigAction, SigSet, Signal::SIGINT, kill, sigaction},
|
||||
unistd::getpid,
|
||||
};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::{
|
||||
borrow::Cow,
|
||||
cell::{Cell, Ref, RefCell},
|
||||
collections::{HashMap, HashSet},
|
||||
ffi::{OsStr, OsString},
|
||||
sync::atomic::AtomicBool,
|
||||
};
|
||||
|
||||
pub use context::Context;
|
||||
@@ -901,6 +902,46 @@ impl VirtualMachine {
|
||||
run_module_as_main.call((module,), self)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn fs_encoding(&self) -> &'static PyStrInterned {
|
||||
identifier!(self, utf_8)
|
||||
}
|
||||
|
||||
pub fn fs_encode_errors(&self) -> &'static PyStrInterned {
|
||||
if cfg!(windows) {
|
||||
identifier!(self, surrogatepass)
|
||||
} else {
|
||||
identifier!(self, surrogateescape)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fsdecode(&self, s: impl Into<OsString>) -> PyStrRef {
|
||||
let bytes = self.ctx.new_bytes(s.into().into_encoded_bytes());
|
||||
let errors = self.fs_encode_errors().to_owned();
|
||||
self.state
|
||||
.codec_registry
|
||||
.decode_text(bytes.into(), "utf-8", Some(errors), self)
|
||||
.unwrap() // this should never fail, since fsdecode should be lossless from the fs encoding
|
||||
}
|
||||
|
||||
pub fn fsencode<'a>(&self, s: &'a Py<PyStr>) -> PyResult<Cow<'a, OsStr>> {
|
||||
if cfg!(windows) || s.is_utf8() {
|
||||
// XXX: this is sketchy on windows; it's not guaranteed that the
|
||||
// OsStr encoding will always be compatible with WTF-8.
|
||||
let s = unsafe { OsStr::from_encoded_bytes_unchecked(s.as_bytes()) };
|
||||
return Ok(Cow::Borrowed(s));
|
||||
}
|
||||
let errors = self.fs_encode_errors().to_owned();
|
||||
let bytes = self
|
||||
.state
|
||||
.codec_registry
|
||||
.encode_text(s.to_owned(), "utf-8", Some(errors), self)?
|
||||
.to_vec();
|
||||
// XXX: this is sketchy on windows; it's not guaranteed that the
|
||||
// OsStr encoding will always be compatible with WTF-8.
|
||||
let s = unsafe { OsString::from_encoded_bytes_unchecked(bytes) };
|
||||
Ok(Cow::Owned(s))
|
||||
}
|
||||
}
|
||||
|
||||
impl AsRef<Context> for VirtualMachine {
|
||||
|
||||
Reference in New Issue
Block a user