Skip to content

Commit

Permalink
Implement normalize lexically
Browse files Browse the repository at this point in the history
  • Loading branch information
ChrisDenton committed Dec 23, 2024
1 parent 904d8f6 commit c08079c
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 0 deletions.
73 changes: 73 additions & 0 deletions library/std/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2148,6 +2148,13 @@ pub struct Path {
#[stable(since = "1.7.0", feature = "strip_prefix")]
pub struct StripPrefixError(());

/// An error returned from [`Path::normalize_lexically`] if a `..` parent reference
/// would escape the path.
#[unstable(feature = "normalize_lexically", issue = "134694")]
#[derive(Debug, PartialEq)]
#[non_exhaustive]
pub struct NormalizeError;

impl Path {
// The following (private!) function allows construction of a path from a u8
// slice, which is only safe when it is known to follow the OsStr encoding.
Expand Down Expand Up @@ -2962,6 +2969,63 @@ impl Path {
fs::canonicalize(self)
}

/// Normalize a path, including `..` without traversing the filesystem.
///
/// Returns an error if normalization would leave leading `..` components.
///
/// <div class="warning">
///
/// This function always resolves `..` to the "lexical" parent.
/// That is "a/b/../c" will always resolve to `a/c` which can change the meaning of the path.
/// In particular, `a/c` and `a/b/../c` are distinct on many systems because `b` may be a symbolic link, so its parent isn’t `a`.
///
/// </div>
///
/// [`path::absolute`](absolute) is an alternative that preserves `..`.
/// Or [`Path::canonicalize`] can be used to resolve any `..` by querying the filesystem.
#[unstable(feature = "normalize_lexically", issue = "134694")]
pub fn normalize_lexically(&self) -> Result<PathBuf, NormalizeError> {
let mut lexical = PathBuf::new();
let mut iter = self.components().peekable();

// Find the root, if any.
let root = match iter.peek() {
Some(Component::ParentDir) => return Err(NormalizeError),
Some(p @ Component::RootDir) | Some(p @ Component::CurDir) => {
lexical.push(p);
iter.next();
lexical.as_os_str().len()
}
Some(Component::Prefix(prefix)) => {
lexical.push(prefix.as_os_str());
iter.next();
if let Some(p @ Component::RootDir) = iter.peek() {
lexical.push(p);
iter.next();
}
lexical.as_os_str().len()
}
None => return Ok(PathBuf::new()),
Some(Component::Normal(_)) => 0,
};

for component in iter {
match component {
Component::RootDir | Component::Prefix(_) => return Err(NormalizeError),
Component::CurDir => continue,
Component::ParentDir => {
if lexical.as_os_str().len() == root {
return Err(NormalizeError);
} else {
lexical.pop();
}
}
Component::Normal(path) => lexical.push(path),
}
}
Ok(lexical)
}

/// Reads a symbolic link, returning the file that the link points to.
///
/// This is an alias to [`fs::read_link`].
Expand Down Expand Up @@ -3503,6 +3567,15 @@ impl Error for StripPrefixError {
}
}

#[unstable(feature = "normalize_lexically", issue = "none")]
impl fmt::Display for NormalizeError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("parent reference `..` points outside of base directory")
}
}
#[unstable(feature = "normalize_lexically", issue = "none")]
impl Error for NormalizeError {}

/// Makes the path absolute without accessing the filesystem.
///
/// If the path is relative, the current directory is used as the base directory.
Expand Down
48 changes: 48 additions & 0 deletions library/std/src/path/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2079,3 +2079,51 @@ fn clone_to_uninit() {
unsafe { a.clone_to_uninit(ptr::from_mut::<Path>(&mut b).cast()) };
assert_eq!(a, &*b);
}

#[test]
fn normalize_lexically() {
#[track_caller]
fn check(a: &str, b: Result<&str, NormalizeError>) {
assert_eq!(Path::new(a).normalize_lexically(), b.map(PathBuf::from));
}

// Relative paths
check("a", Ok("a"));
check("./a", Ok("./a"));
check("a/b/c", Ok("a/b/c"));
check("a/././b/./c/.", Ok("a/b/c"));
check("a/../c", Ok("c"));
check("./a/b", Ok("./a/b"));
check("a/../b/c/..", Ok("b"));

check("..", Err(NormalizeError));
check("../..", Err(NormalizeError));
check("a/../..", Err(NormalizeError));
check("a/../../b", Err(NormalizeError));
check("a/../../b/c", Err(NormalizeError));
check("a/../b/../..", Err(NormalizeError));

// Check we don't escape the root or prefix
#[cfg(unix)]
{
check("/..", Err(NormalizeError));
check("/a/../..", Err(NormalizeError));
}
#[cfg(windows)]
{
check(r"C:\..", Err(NormalizeError));
check(r"C:\a\..\..", Err(NormalizeError));

check(r"C:..", Err(NormalizeError));
check(r"C:a\..\..", Err(NormalizeError));

check(r"\\server\share\..", Err(NormalizeError));
check(r"\\server\share\a\..\..", Err(NormalizeError));

check(r"\..", Err(NormalizeError));
check(r"\a\..\..", Err(NormalizeError));

check(r"\\?\UNC\server\share\..", Err(NormalizeError));
check(r"\\?\UNC\server\share\a\..\..", Err(NormalizeError));
}
}

0 comments on commit c08079c

Please sign in to comment.