Reimplement canonical in terms of GetFinalPathNameByHandleW on Windows.

This moves the common part of v3 and v4 canonical() to a separate
function and changes the Windows implementation to use
GetFinalPathNameByHandleW system call. As a side effect, this converts
drive names to upper case, which makes paths more interoperable.

Additionally, on POSIX systems, avoid adding a trailing directory
separator if the input path has one (which may be the case in v4). This
is consistent with libstdc++ and MSVC implementations of std::filesystem.

Fixes https://github.com/boostorg/filesystem/issues/325.
This commit is contained in:
Andrey Semashev 2024-09-16 21:12:32 +03:00
parent 01cadd08e4
commit 11beaba974
3 changed files with 156 additions and 143 deletions

View File

@ -40,6 +40,13 @@
</tr> </tr>
</table> </table>
<h2>1.87.0</h2>
<ul>
<li>As was announced in 1.84.0, Windows versions prior to 10 are no longer supported.</li>
<li>On Windows, <code>canonical</code> is now based on the <code>GetFinalPathNameByHandleW</code> WinAPI function. As a side effect, drive letters are converted to upper case, which makes the resulting paths more interoperable. (<a href="https://github.com/boostorg/filesystem/issues/325">#325</a>)</li>
<li><b>v4:</b> <code>canonical</code> no longer produces a trailing directory separator in the resulting path, if the input path has one.</li>
</ul>
<h2>1.86.0</h2> <h2>1.86.0</h2>
<ul> <ul>
<li><code>is_empty</code> operation is now better protected against concurrent filesystem modifications.</li> <li><code>is_empty</code> operation is now better protected against concurrent filesystem modifications.</li>

View File

@ -263,11 +263,11 @@ namespace {
// The number of retries remove_all should make if it detects that the directory it is about to enter has been replaced with a symlink or a regular file // The number of retries remove_all should make if it detects that the directory it is about to enter has been replaced with a symlink or a regular file
BOOST_CONSTEXPR_OR_CONST unsigned int remove_all_directory_replaced_retry_count = 5u; BOOST_CONSTEXPR_OR_CONST unsigned int remove_all_directory_replaced_retry_count = 5u;
#if defined(BOOST_POSIX_API)
// Size of a small buffer for a path that can be placed on stack, in character code units // Size of a small buffer for a path that can be placed on stack, in character code units
BOOST_CONSTEXPR_OR_CONST std::size_t small_path_size = 1024u; BOOST_CONSTEXPR_OR_CONST std::size_t small_path_size = 1024u;
#if defined(BOOST_POSIX_API)
// Absolute maximum path length, in character code units, that we're willing to accept from various system calls. // Absolute maximum path length, in character code units, that we're willing to accept from various system calls.
// This value is arbitrary, it is supposed to be a hard limit to avoid memory exhaustion // This value is arbitrary, it is supposed to be a hard limit to avoid memory exhaustion
// in some of the algorithms below in case of some corrupted or maliciously broken filesystem. // in some of the algorithms below in case of some corrupted or maliciously broken filesystem.
@ -2383,7 +2383,7 @@ inline path convert_nt_path_to_win32_path(const wchar_t* nt_path, std::size_t si
( (
// Check if the following is a drive letter // Check if the following is a drive letter
( (
detail::is_letter(nt_path[pos]) && nt_path[pos + 1u] == colon && nt_path[pos + 1u] == colon && detail::is_letter(nt_path[pos]) &&
((size - pos) == 2u || detail::is_directory_separator(nt_path[pos + 2u])) ((size - pos) == 2u || detail::is_directory_separator(nt_path[pos + 2u]))
) || ) ||
// Check for an "incorrect" syntax for UNC path junction points // Check for an "incorrect" syntax for UNC path junction points
@ -2565,15 +2565,11 @@ path absolute_v4(path const& p, path const& base, system::error_code* ec)
return res; return res;
} }
BOOST_FILESYSTEM_DECL namespace {
path canonical_v3(path const& p, path const& base, system::error_code* ec)
inline path canonical_common(path& source, system::error_code* ec)
{ {
path source(detail::absolute_v3(p, base, ec)); #if defined(BOOST_POSIX_API)
if (ec && *ec)
{
return_empty_path:
return path();
}
system::error_code local_ec; system::error_code local_ec;
file_status st(detail::status_impl(source, &local_ec)); file_status st(detail::status_impl(source, &local_ec));
@ -2590,7 +2586,9 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
BOOST_FILESYSTEM_THROW(filesystem_error("boost::filesystem::canonical", source, local_ec)); BOOST_FILESYSTEM_THROW(filesystem_error("boost::filesystem::canonical", source, local_ec));
*ec = local_ec; *ec = local_ec;
goto return_empty_path;
return_empty_path:
return path();
} }
path root(source.root_path()); path root(source.root_path());
@ -2602,6 +2600,8 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
{ {
for (path::iterator itr(source.begin()), end(source.end()); itr != end; path_algorithms::increment_v4(itr)) for (path::iterator itr(source.begin()), end(source.end()); itr != end; path_algorithms::increment_v4(itr))
{ {
if (itr->empty())
continue;
if (path_algorithms::compare_v4(*itr, dot_p) == 0) if (path_algorithms::compare_v4(*itr, dot_p) == 0)
continue; continue;
if (path_algorithms::compare_v4(*itr, dot_dot_p) == 0) if (path_algorithms::compare_v4(*itr, dot_dot_p) == 0)
@ -2624,12 +2624,6 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
path_algorithms::append_v4(result, *itr); path_algorithms::append_v4(result, *itr);
// If we don't have an absolute path yet then don't check symlink status.
// This avoids checking "C:" which is "the current directory on drive C"
// and hence not what we want to check/resolve here.
if (!result.is_absolute())
continue;
st = detail::symlink_status_impl(result, ec); st = detail::symlink_status_impl(result, ec);
if (ec && *ec) if (ec && *ec)
goto return_empty_path; goto return_empty_path;
@ -2656,7 +2650,7 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
if (path_algorithms::compare_v4(*itr, dot_p) != 0) if (path_algorithms::compare_v4(*itr, dot_p) != 0)
path_algorithms::append_v4(link, *itr); path_algorithms::append_v4(link, *itr);
} }
source = link; source = std::move(link);
root = source.root_path(); root = source.root_path();
} }
else // link is relative else // link is relative
@ -2672,7 +2666,7 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
if (path_algorithms::compare_v4(*itr, dot_p) != 0) if (path_algorithms::compare_v4(*itr, dot_p) != 0)
path_algorithms::append_v4(new_source, *itr); path_algorithms::append_v4(new_source, *itr);
} }
source = new_source; source = std::move(new_source);
} }
// symlink causes scan to be restarted // symlink causes scan to be restarted
@ -2688,6 +2682,122 @@ path canonical_v3(path const& p, path const& base, system::error_code* ec)
BOOST_ASSERT_MSG(result.is_absolute(), "canonical() implementation error; please report"); BOOST_ASSERT_MSG(result.is_absolute(), "canonical() implementation error; please report");
return result; return result;
#else // defined(BOOST_POSIX_API)
unique_handle h(create_file_handle(
source.c_str(),
FILE_READ_ATTRIBUTES | FILE_READ_EA,
FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE,
nullptr, // lpSecurityAttributes
OPEN_EXISTING,
FILE_FLAG_BACKUP_SEMANTICS));
DWORD err;
if (!h)
{
err = ::GetLastError();
fail_err:
emit_error(err, source, ec, "boost::filesystem::canonical");
return path();
}
WCHAR path_small_buf[small_path_size];
std::unique_ptr< WCHAR[] > path_large_buf;
WCHAR* path_buf = path_small_buf;
DWORD path_buf_size = sizeof(path_small_buf) / sizeof(*path_small_buf);
DWORD flags = FILE_NAME_NORMALIZED;
DWORD path_len;
while (true)
{
path_len = ::GetFinalPathNameByHandleW(h.get(), path_buf, path_buf_size, flags);
if (path_len > 0)
{
if (path_len < path_buf_size)
break;
// The buffer is not large enough, path_len is the required buffer size, including the terminating zero
path_large_buf.reset(new WCHAR[path_len]);
path_buf = path_large_buf.get();
path_buf_size = path_len;
}
else
{
err = ::GetLastError();
if (BOOST_UNLIKELY(err == ERROR_PATH_NOT_FOUND && (flags & VOLUME_NAME_NT) == 0u))
{
// Drive letter does not exist for the file, obtain an NT path for it
flags |= VOLUME_NAME_NT;
continue;
}
goto fail_err;
}
}
path result;
if ((flags & VOLUME_NAME_NT) == 0u)
{
// If the input path did not contain a long path prefix, convert
// "\\?\X:" to "X:" and "\\?\UNC\" to "\\". Otherwise, preserve the prefix.
const path::value_type* source_str = source.c_str();
if (source.size() < 4u ||
source_str[0] != path::preferred_separator ||
source_str[1] != path::preferred_separator ||
source_str[2] != questionmark ||
source_str[3] != path::preferred_separator)
{
if (path_len >= 6u &&
path_buf[0] == path::preferred_separator &&
path_buf[1] == path::preferred_separator &&
path_buf[2] == questionmark &&
path_buf[3] == path::preferred_separator)
{
if (path_buf[5] == colon && detail::is_letter(path_buf[4]))
{
path_buf += 4;
path_len -= 4u;
}
else if (path_len >= 8u &&
(path_buf[4] == L'U' || path_buf[4] == L'u') &&
(path_buf[5] == L'N' || path_buf[5] == L'n') &&
(path_buf[6] == L'C' || path_buf[6] == L'c') &&
path_buf[7] == path::preferred_separator)
{
path_buf += 6;
path_len -= 6u;
path_buf[0] = path::preferred_separator;
}
}
}
result.assign(path_buf, path_buf + path_len);
}
else
{
// Convert NT path to a Win32 path
result.assign(L"\\\\?\\GLOBALROOT");
result.concat(path_buf, path_buf + path_len);
}
BOOST_ASSERT_MSG(result.is_absolute(), "canonical() implementation error; please report");
return result;
#endif // defined(BOOST_POSIX_API)
}
} // unnamed namespace
BOOST_FILESYSTEM_DECL
path canonical_v3(path const& p, path const& base, system::error_code* ec)
{
path source(detail::absolute_v3(p, base, ec));
if (ec && *ec)
return path();
return detail::canonical_common(source, ec);
} }
BOOST_FILESYSTEM_DECL BOOST_FILESYSTEM_DECL
@ -2695,124 +2805,9 @@ path canonical_v4(path const& p, path const& base, system::error_code* ec)
{ {
path source(detail::absolute_v4(p, base, ec)); path source(detail::absolute_v4(p, base, ec));
if (ec && *ec) if (ec && *ec)
{
return_empty_path:
return path(); return path();
}
system::error_code local_ec; return detail::canonical_common(source, ec);
file_status st(detail::status_impl(source, &local_ec));
if (st.type() == fs::file_not_found)
{
local_ec = system::errc::make_error_code(system::errc::no_such_file_or_directory);
goto fail_local_ec;
}
else if (local_ec)
{
fail_local_ec:
if (!ec)
BOOST_FILESYSTEM_THROW(filesystem_error("boost::filesystem::canonical", source, local_ec));
*ec = local_ec;
goto return_empty_path;
}
path root(source.root_path());
path const& dot_p = dot_path();
path const& dot_dot_p = dot_dot_path();
unsigned int symlinks_allowed = symloop_max;
path result;
while (true)
{
for (path::iterator itr(source.begin()), end(source.end()); itr != end; path_algorithms::increment_v4(itr))
{
if (path_algorithms::compare_v4(*itr, dot_p) == 0)
continue;
if (path_algorithms::compare_v4(*itr, dot_dot_p) == 0)
{
if (path_algorithms::compare_v4(result, root) != 0)
result.remove_filename_and_trailing_separators();
continue;
}
if (itr->size() == 1u && detail::is_directory_separator(itr->native()[0]))
{
// Convert generic separator returned by the iterator for the root directory to
// the preferred separator. This is important on Windows, as in some cases,
// like paths for network shares and cloud storage mount points GetFileAttributesW
// will return "file not found" if the path contains forward slashes.
result += path::preferred_separator;
// We don't need to check for a symlink after adding a separator.
continue;
}
path_algorithms::append_v4(result, *itr);
// If we don't have an absolute path yet then don't check symlink status.
// This avoids checking "C:" which is "the current directory on drive C"
// and hence not what we want to check/resolve here.
if (!result.is_absolute())
continue;
st = detail::symlink_status_impl(result, ec);
if (ec && *ec)
goto return_empty_path;
if (is_symlink(st))
{
if (symlinks_allowed == 0)
{
local_ec = system::errc::make_error_code(system::errc::too_many_symbolic_link_levels);
goto fail_local_ec;
}
--symlinks_allowed;
path link(detail::read_symlink(result, ec));
if (ec && *ec)
goto return_empty_path;
result.remove_filename_and_trailing_separators();
if (link.is_absolute())
{
for (path_algorithms::increment_v4(itr); itr != end; path_algorithms::increment_v4(itr))
{
if (path_algorithms::compare_v4(*itr, dot_p) != 0)
path_algorithms::append_v4(link, *itr);
}
source = link;
root = source.root_path();
}
else // link is relative
{
link.remove_trailing_separator();
if (path_algorithms::compare_v4(link, dot_p) == 0)
continue;
path new_source(result);
path_algorithms::append_v4(new_source, link);
for (path_algorithms::increment_v4(itr); itr != end; path_algorithms::increment_v4(itr))
{
if (path_algorithms::compare_v4(*itr, dot_p) != 0)
path_algorithms::append_v4(new_source, *itr);
}
source = new_source;
}
// symlink causes scan to be restarted
goto restart_scan;
}
}
break;
restart_scan:
result.clear();
}
BOOST_ASSERT_MSG(result.is_absolute(), "canonical() implementation error; please report");
return result;
} }
BOOST_FILESYSTEM_DECL BOOST_FILESYSTEM_DECL

View File

@ -1781,15 +1781,9 @@ void canonical_basic_tests()
BOOST_TEST(ok); BOOST_TEST(ok);
// non-symlink tests; also see canonical_symlink_tests() // non-symlink tests; also see canonical_symlink_tests()
#if BOOST_FILESYSTEM_VERSION == 3
BOOST_TEST_EQ(fs::canonical(""), fs::current_path()); BOOST_TEST_EQ(fs::canonical(""), fs::current_path());
BOOST_TEST_EQ(fs::canonical("", fs::current_path()), fs::current_path()); BOOST_TEST_EQ(fs::canonical("", fs::current_path()), fs::current_path());
BOOST_TEST_EQ(fs::canonical("", ""), fs::current_path()); BOOST_TEST_EQ(fs::canonical("", ""), fs::current_path());
#else
BOOST_TEST_EQ(fs::canonical(""), fs::current_path() / fs::path());
BOOST_TEST_EQ(fs::canonical("", fs::current_path()), fs::current_path() / fs::path());
BOOST_TEST_EQ(fs::canonical("", ""), fs::current_path() / fs::path());
#endif
BOOST_TEST_EQ(fs::canonical(fs::current_path()), fs::current_path()); BOOST_TEST_EQ(fs::canonical(fs::current_path()), fs::current_path());
BOOST_TEST_EQ(fs::canonical(fs::current_path(), ""), fs::current_path()); BOOST_TEST_EQ(fs::canonical(fs::current_path(), ""), fs::current_path());
BOOST_TEST_EQ(fs::canonical(fs::current_path(), "no-such-file"), fs::current_path()); BOOST_TEST_EQ(fs::canonical(fs::current_path(), "no-such-file"), fs::current_path());
@ -1836,6 +1830,23 @@ void canonical_basic_tests()
// Test Windows long paths // Test Windows long paths
fs::path long_path = make_long_path(dir / L"f0"); fs::path long_path = make_long_path(dir / L"f0");
BOOST_TEST_EQ(fs::canonical(long_path), long_path); BOOST_TEST_EQ(fs::canonical(long_path), long_path);
// Test that canonical() consistently converts drive letters to upper case.
// https://github.com/boostorg/filesystem/issues/325
{
fs::path::string_type root_path = dir.root_path().native(), lc_root_path, uc_root_path;
for (fs::path::string_type::value_type c : root_path)
{
fs::path::string_type::value_type lc = c, uc = c;
if (lc >= L'A' && lc <= L'Z')
lc += L'a' - L'A';
if (uc >= L'a' && uc <= L'z')
uc -= L'a' - L'A';
lc_root_path.push_back(lc);
uc_root_path.push_back(uc);
}
BOOST_TEST_EQ(fs::canonical(fs::path(lc_root_path) / dir.relative_path()), fs::canonical(fs::path(uc_root_path) / dir.relative_path()));
}
#endif #endif
} }
@ -2829,7 +2840,7 @@ void weakly_canonical_basic_tests()
BOOST_TEST_EQ(fs::weakly_canonical("..//foo", d1), dir / "foo"); BOOST_TEST_EQ(fs::weakly_canonical("..//foo", d1), dir / "foo");
#ifdef BOOST_WINDOWS_API #ifdef BOOST_WINDOWS_API
BOOST_TEST_EQ(fs::weakly_canonical("c:/no-such/foo/bar"), fs::path("c:/no-such/foo/bar")); BOOST_TEST_EQ(fs::weakly_canonical("c:/no-such/foo/bar"), fs::path("C:/no-such/foo/bar"));
// Test Windows long paths // Test Windows long paths
fs::path long_path = make_long_path(dir / L"f0"); fs::path long_path = make_long_path(dir / L"f0");