Skip to content

Commit de698ae

Browse files
committed
[libcxx] Convert paths to/from the right narrow code page for narrow strings on windows
On windows, the narrow, char based paths normally don't use utf8, but can use many different native code pages, and this is what system functions that operate on files, taking such paths/file names, interpret them as. Differential Revision: https://reviews.llvm.org/D91137
1 parent 48c6500 commit de698ae

File tree

3 files changed

+148
-4
lines changed

3 files changed

+148
-4
lines changed

libcxx/include/filesystem

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,13 @@ typedef string __path_string;
690690
typedef char __path_value;
691691
#endif
692692

693+
#if defined(_LIBCPP_WIN32API)
694+
_LIBCPP_FUNC_VIS
695+
size_t __wide_to_char(const wstring&, char*, size_t);
696+
_LIBCPP_FUNC_VIS
697+
size_t __char_to_wide(const string&, wchar_t*, size_t);
698+
#endif
699+
693700
template <class _ECharT>
694701
struct _PathCVT;
695702

@@ -793,6 +800,48 @@ struct _PathCVT<__path_value> {
793800
};
794801

795802
#if defined(_LIBCPP_WIN32API)
803+
template <>
804+
struct _PathCVT<char> {
805+
806+
static void
807+
__append_string(__path_string& __dest, const basic_string<char> &__str) {
808+
size_t __size = __char_to_wide(__str, nullptr, 0);
809+
size_t __pos = __dest.size();
810+
__dest.resize(__pos + __size);
811+
__char_to_wide(__str, const_cast<__path_value*>(__dest.data()) + __pos, __size);
812+
}
813+
814+
template <class _Iter>
815+
static typename enable_if<__is_exactly_cpp17_input_iterator<_Iter>::value>::type
816+
__append_range(__path_string& __dest, _Iter __b, _Iter __e) {
817+
basic_string<char> __tmp(__b, __e);
818+
__append_string(__dest, __tmp);
819+
}
820+
821+
template <class _Iter>
822+
static typename enable_if<__is_cpp17_forward_iterator<_Iter>::value>::type
823+
__append_range(__path_string& __dest, _Iter __b, _Iter __e) {
824+
basic_string<char> __tmp(__b, __e);
825+
__append_string(__dest, __tmp);
826+
}
827+
828+
template <class _Iter>
829+
static void __append_range(__path_string& __dest, _Iter __b, _NullSentinel) {
830+
const char __sentinel = char{};
831+
basic_string<char> __tmp;
832+
for (; *__b != __sentinel; ++__b)
833+
__tmp.push_back(*__b);
834+
__append_string(__dest, __tmp);
835+
}
836+
837+
template <class _Source>
838+
static void __append_source(__path_string& __dest, _Source const& __s) {
839+
using _Traits = __is_pathable<_Source>;
840+
__append_range(__dest, _Traits::__range_begin(__s),
841+
_Traits::__range_end(__s));
842+
}
843+
};
844+
796845
template <class _ECharT>
797846
struct _PathExport {
798847
typedef __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__> _Narrower;
@@ -806,6 +855,17 @@ struct _PathExport {
806855
}
807856
};
808857

858+
template <>
859+
struct _PathExport<char> {
860+
template <class _Str>
861+
static void __append(_Str& __dest, const __path_string& __src) {
862+
size_t __size = __wide_to_char(__src, nullptr, 0);
863+
size_t __pos = __dest.size();
864+
__dest.resize(__size);
865+
__wide_to_char(__src, const_cast<char*>(__dest.data()) + __pos, __size);
866+
}
867+
};
868+
809869
template <>
810870
struct _PathExport<wchar_t> {
811871
template <class _Str>
@@ -1110,7 +1170,11 @@ public:
11101170
return string<char>();
11111171
}
11121172
_LIBCPP_INLINE_VISIBILITY __u8_string u8string() const {
1113-
return string<__u8_string::value_type>();
1173+
using _CVT = __narrow_to_utf8<sizeof(wchar_t) * __CHAR_BIT__>;
1174+
__u8_string __s;
1175+
__s.reserve(__pn_.size());
1176+
_CVT()(back_inserter(__s), __pn_.data(), __pn_.data() + __pn_.size());
1177+
return __s;
11141178
}
11151179

11161180
_LIBCPP_INLINE_VISIBILITY _VSTD::u16string u16string() const {
@@ -1373,9 +1437,42 @@ _LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T
13731437
is_same<typename __is_pathable<_InputIt>::__char_type, char>::value,
13741438
"u8path(Iter, Iter) requires Iter have a value_type of type 'char'"
13751439
" or 'char8_t'");
1440+
#if defined(_LIBCPP_WIN32API)
1441+
string __tmp(__f, __l);
1442+
using _CVT = __widen_from_utf8<sizeof(wchar_t) * __CHAR_BIT__>;
1443+
_VSTD::wstring __w;
1444+
__w.reserve(__tmp.size());
1445+
_CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size());
1446+
return path(__w);
1447+
#else
13761448
return path(__f, __l);
1449+
#endif /* !_LIBCPP_WIN32API */
13771450
}
13781451

1452+
#if defined(_LIBCPP_WIN32API)
1453+
template <class _InputIt>
1454+
_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T
1455+
typename enable_if<__is_pathable<_InputIt>::value, path>::type
1456+
u8path(_InputIt __f, _NullSentinel) {
1457+
static_assert(
1458+
#ifndef _LIBCPP_NO_HAS_CHAR8_T
1459+
is_same<typename __is_pathable<_InputIt>::__char_type, char8_t>::value ||
1460+
#endif
1461+
is_same<typename __is_pathable<_InputIt>::__char_type, char>::value,
1462+
"u8path(Iter, Iter) requires Iter have a value_type of type 'char'"
1463+
" or 'char8_t'");
1464+
string __tmp;
1465+
const char __sentinel = char{};
1466+
for (; *__f != __sentinel; ++__f)
1467+
__tmp.push_back(*__f);
1468+
using _CVT = __widen_from_utf8<sizeof(wchar_t) * __CHAR_BIT__>;
1469+
_VSTD::wstring __w;
1470+
__w.reserve(__tmp.size());
1471+
_CVT()(back_inserter(__w), __tmp.data(), __tmp.data() + __tmp.size());
1472+
return path(__w);
1473+
}
1474+
#endif /* _LIBCPP_WIN32API */
1475+
13791476
template <class _Source>
13801477
_LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T
13811478
typename enable_if<__is_pathable<_Source>::value, path>::type
@@ -1387,7 +1484,12 @@ _LIBCPP_INLINE_VISIBILITY _LIBCPP_DEPRECATED_WITH_CHAR8_T
13871484
is_same<typename __is_pathable<_Source>::__char_type, char>::value,
13881485
"u8path(Source const&) requires Source have a character type of type "
13891486
"'char' or 'char8_t'");
1487+
#if defined(_LIBCPP_WIN32API)
1488+
using _Traits = __is_pathable<_Source>;
1489+
return u8path(__unwrap_iter(_Traits::__range_begin(__s)), __unwrap_iter(_Traits::__range_end(__s)));
1490+
#else
13901491
return path(__s);
1492+
#endif
13911493
}
13921494

13931495
class _LIBCPP_TYPE_VIS path::iterator {

libcxx/src/filesystem/filesystem_common.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ template <>
126126
bool error_value<bool>() {
127127
return false;
128128
}
129+
#if __SIZEOF_SIZE_T__ != __SIZEOF_LONG_LONG__
130+
template <>
131+
size_t error_value<size_t>() {
132+
return size_t(-1);
133+
}
134+
#endif
129135
template <>
130136
uintmax_t error_value<uintmax_t>() {
131137
return uintmax_t(-1);

libcxx/src/filesystem/operations.cpp

Lines changed: 39 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,15 @@
1717

1818
#include "filesystem_common.h"
1919

20-
#include <unistd.h>
21-
#include <sys/stat.h>
22-
#include <sys/statvfs.h>
20+
#if defined(_LIBCPP_WIN32API)
21+
# define WIN32_LEAN_AND_MEAN
22+
# define NOMINMAX
23+
# include <windows.h>
24+
#else
25+
# include <unistd.h>
26+
# include <sys/stat.h>
27+
# include <sys/statvfs.h>
28+
#endif
2329
#include <time.h>
2430
#include <fcntl.h> /* values for fchmodat */
2531

@@ -1680,6 +1686,36 @@ path::iterator& path::iterator::__decrement() {
16801686
return *this;
16811687
}
16821688

1689+
#if defined(_LIBCPP_WIN32API)
1690+
////////////////////////////////////////////////////////////////////////////
1691+
// Windows path conversions
1692+
size_t __wide_to_char(const wstring &str, char *out, size_t outlen) {
1693+
if (str.empty())
1694+
return 0;
1695+
ErrorHandler<size_t> err("__wide_to_char", nullptr);
1696+
UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
1697+
BOOL used_default = FALSE;
1698+
int ret = WideCharToMultiByte(codepage, 0, str.data(), str.size(), out,
1699+
outlen, nullptr, &used_default);
1700+
if (ret <= 0 || used_default)
1701+
return err.report(errc::illegal_byte_sequence);
1702+
return ret;
1703+
}
1704+
1705+
size_t __char_to_wide(const string &str, wchar_t *out, size_t outlen) {
1706+
if (str.empty())
1707+
return 0;
1708+
ErrorHandler<size_t> err("__char_to_wide", nullptr);
1709+
UINT codepage = AreFileApisANSI() ? CP_ACP : CP_OEMCP;
1710+
int ret = MultiByteToWideChar(codepage, MB_ERR_INVALID_CHARS, str.data(),
1711+
str.size(), out, outlen);
1712+
if (ret <= 0)
1713+
return err.report(errc::illegal_byte_sequence);
1714+
return ret;
1715+
}
1716+
#endif
1717+
1718+
16831719
///////////////////////////////////////////////////////////////////////////////
16841720
// directory entry definitions
16851721
///////////////////////////////////////////////////////////////////////////////

0 commit comments

Comments
 (0)