From 3e53c929a2298e59d9abd3973094db3d34d59e98 Mon Sep 17 00:00:00 2001 From: LEE Wondong Date: Sun, 20 Oct 2013 15:02:03 +0900 Subject: [PATCH] Fix unicode errors on Windows in path_is_dir, path_exists, getcwd and rust_localtime. This make these functions use wchar_t version of APIs, instead of char version. --- src/libstd/libc.rs | 14 +++++-- src/libstd/os.rs | 87 ++++++++++++++++++++++++++++++++++++++++- src/rt/rust_builtin.cpp | 46 ++++++++++++++++++++-- src/rt/rust_globals.h | 8 ++++ src/rt/rustrt.def.in | 2 + 5 files changed, 148 insertions(+), 9 deletions(-) diff --git a/src/libstd/libc.rs b/src/libstd/libc.rs index 3313823fc5f..d4df0e826f6 100644 --- a/src/libstd/libc.rs +++ b/src/libstd/libc.rs @@ -1208,7 +1208,7 @@ pub mod consts { pub static ERROR_ALREADY_EXISTS : c_int = 183; pub static ERROR_INVALID_ADDRESS : c_int = 487; pub static ERROR_FILE_INVALID : c_int = 1006; - pub static INVALID_HANDLE_VALUE: c_int = -1; + pub static INVALID_HANDLE_VALUE : c_int = -1; pub static DELETE : DWORD = 0x00010000; pub static READ_CONTROL : DWORD = 0x00020000; @@ -3352,11 +3352,14 @@ pub mod funcs { LPSECURITY_ATTRIBUTES) -> BOOL; pub fn CopyFileW(lpExistingFileName: LPCWSTR, - lpNewFileName: LPCWSTR, - bFailIfExists: BOOL) - -> BOOL; + lpNewFileName: LPCWSTR, + bFailIfExists: BOOL) + -> BOOL; pub fn DeleteFileW(lpPathName: LPCWSTR) -> BOOL; pub fn RemoveDirectoryW(lpPathName: LPCWSTR) -> BOOL; + pub fn GetCurrentDirectoryW(nBufferLength: DWORD, + lpBuffer: LPWSTR) + -> DWORD; pub fn SetCurrentDirectoryW(lpPathName: LPCWSTR) -> BOOL; pub fn GetLastError() -> DWORD; pub fn FindFirstFileW(fileName: *u16, findFileData: HANDLE) @@ -3462,6 +3465,9 @@ pub mod funcs { -> BOOL; pub fn DeleteFileW(lpPathName: LPCWSTR) -> BOOL; pub fn RemoveDirectoryW(lpPathName: LPCWSTR) -> BOOL; + pub fn GetCurrentDirectoryW(nBufferLength: DWORD, + lpBuffer: LPWSTR) + -> DWORD; pub fn SetCurrentDirectoryW(lpPathName: LPCWSTR) -> BOOL; pub fn GetLastError() -> DWORD; pub fn FindFirstFileW(fileName: *u16, findFileData: HANDLE) diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 348bfd5c61a..a7d4bc9140b 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -28,7 +28,8 @@ #[allow(missing_doc)]; -use c_str::{CString, ToCStr}; +#[cfg(unix)] +use c_str::CString; use clone::Clone; use container::Container; use io; @@ -56,6 +57,11 @@ pub fn close(fd: c_int) -> c_int { } } +// On Windows, wide character version of function must be used to support +// unicode, so functions should be split into at least two versions, +// which are for Windows and for non-Windows, if necessary. +// See https://github.com/mozilla/rust/issues/9822 for more information. + pub mod rustrt { use libc::{c_char, c_int}; use libc; @@ -64,11 +70,19 @@ pub mod rustrt { pub fn rust_path_is_dir(path: *libc::c_char) -> c_int; pub fn rust_path_exists(path: *libc::c_char) -> c_int; } + + // Uses _wstat instead of stat. + #[cfg(windows)] + extern { + pub fn rust_path_is_dir_u16(path: *u16) -> c_int; + pub fn rust_path_exists_u16(path: *u16) -> c_int; + } } pub static TMPBUF_SZ : uint = 1000u; static BUF_BYTES : uint = 2048u; +#[cfg(unix)] pub fn getcwd() -> Path { #[fixed_stack_segment]; #[inline(never)]; let mut buf = [0 as libc::c_char, ..BUF_BYTES]; @@ -83,6 +97,22 @@ pub fn getcwd() -> Path { } } +#[cfg(windows)] +pub fn getcwd() -> Path { + #[fixed_stack_segment]; #[inline(never)]; + use libc::DWORD; + use libc::GetCurrentDirectoryW; + let mut buf = [0 as u16, ..BUF_BYTES]; + do buf.as_mut_buf |buf, len| { + unsafe { + if libc::GetCurrentDirectoryW(len as DWORD, buf) == 0 as DWORD { + fail2!(); + } + } + } + Path::new(str::from_utf16(buf)) +} + #[cfg(windows)] pub mod win32 { use libc; @@ -613,6 +643,7 @@ pub fn walk_dir(p: &Path, f: &fn(&Path) -> bool) -> bool { }) } +#[cfg(unix)] /// Indicates whether a path represents a directory pub fn path_is_dir(p: &Path) -> bool { #[fixed_stack_segment]; #[inline(never)]; @@ -623,6 +654,18 @@ pub fn path_is_dir(p: &Path) -> bool { } } + +#[cfg(windows)] +pub fn path_is_dir(p: &Path) -> bool { + #[fixed_stack_segment]; #[inline(never)]; + unsafe { + do os::win32::as_utf16_p(p.as_str().unwrap()) |buf| { + rustrt::rust_path_is_dir_u16(buf) != 0 as c_int + } + } +} + +#[cfg(unix)] /// Indicates whether a path exists pub fn path_exists(p: &Path) -> bool { #[fixed_stack_segment]; #[inline(never)]; @@ -633,6 +676,16 @@ pub fn path_exists(p: &Path) -> bool { } } +#[cfg(windows)] +pub fn path_exists(p: &Path) -> bool { + #[fixed_stack_segment]; #[inline(never)]; + unsafe { + do os::win32::as_utf16_p(p.as_str().unwrap()) |buf| { + rustrt::rust_path_exists_u16(buf) != 0 as c_int + } + } +} + /** * Convert a relative path to an absolute path * @@ -1922,8 +1975,31 @@ mod tests { #[test] fn path_is_dir() { + use rt::io::file::open; + use rt::io::{OpenOrCreate, Read}; + assert!((os::path_is_dir(&Path::new(".")))); assert!((!os::path_is_dir(&Path::new("test/stdtest/fs.rs")))); + + let mut dirpath = os::tmpdir(); + dirpath.push(format!("rust-test-{}/test-\uac00\u4e00\u30fc\u4f60\u597d", + rand::random::())); // 가一ー你好 + debug2!("path_is_dir dirpath: {}", dirpath.display()); + + let mkdir_result = os::mkdir_recursive(&dirpath, (S_IRUSR | S_IWUSR | S_IXUSR) as i32); + debug2!("path_is_dir mkdir_result: {}", mkdir_result); + + assert!((os::path_is_dir(&dirpath))); + + let mut filepath = dirpath; + filepath.push("unicode-file-\uac00\u4e00\u30fc\u4f60\u597d.rs"); + debug2!("path_is_dir filepath: {}", filepath.display()); + + open(&filepath, OpenOrCreate, Read); // ignore return; touch only + assert!((!os::path_is_dir(&filepath))); + + assert!((!os::path_is_dir(&Path::new( + "test/unicode-bogus-dir-\uac00\u4e00\u30fc\u4f60\u597d")))); } #[test] @@ -1931,6 +2007,15 @@ mod tests { assert!((os::path_exists(&Path::new(".")))); assert!((!os::path_exists(&Path::new( "test/nonexistent-bogus-path")))); + + let mut dirpath = os::tmpdir(); + dirpath.push(format!("rust-test-{}/test-\uac01\u4e01\u30fc\u518d\u89c1", + rand::random::())); // 각丁ー再见 + + os::mkdir_recursive(&dirpath, (S_IRUSR | S_IWUSR | S_IXUSR) as i32); + assert!((os::path_exists(&dirpath))); + assert!((!os::path_exists(&Path::new( + "test/unicode-bogus-path-\uac01\u4e01\u30fc\u518d\u89c1")))); } #[test] diff --git a/src/rt/rust_builtin.cpp b/src/rt/rust_builtin.cpp index 9750e22e945..d20e91f3917 100644 --- a/src/rt/rust_builtin.cpp +++ b/src/rt/rust_builtin.cpp @@ -107,7 +107,7 @@ rust_list_dir_wfd_fp_buf(void* wfd) { #endif extern "C" CDECL int -rust_path_is_dir(char *path) { +rust_path_is_dir(const char *path) { struct stat buf; if (stat(path, &buf)) { return 0; @@ -116,7 +116,25 @@ rust_path_is_dir(char *path) { } extern "C" CDECL int -rust_path_exists(char *path) { +#if defined(__WIN32__) +rust_path_is_dir_u16(const wchar_t *path) { + struct _stat buf; + // Don't use GetFileAttributesW, it cannot get attributes of + // some system files (e.g. pagefile.sys). + if (_wstat(path, &buf)) { + return 0; + } + return S_ISDIR(buf.st_mode); +} +#else +rust_path_is_dir_u16(const void *path) { + // Wide version of function is only used on Windows. + return 0; +} +#endif + +extern "C" CDECL int +rust_path_exists(const char *path) { struct stat buf; if (stat(path, &buf)) { return 0; @@ -124,6 +142,22 @@ rust_path_exists(char *path) { return 1; } +extern "C" CDECL int +#if defined(__WIN32__) +rust_path_exists_u16(const wchar_t *path) { + struct _stat buf; + if (_wstat(path, &buf)) { + return 0; + } + return 1; +} +#else +rust_path_exists_u16(const void *path) { + // Wide version of function is only used on Windows. + return 0; +} +#endif + extern "C" CDECL FILE* rust_get_stdin() {return stdin;} extern "C" CDECL FILE* rust_get_stdout() {return stdout;} extern "C" CDECL FILE* rust_get_stderr() {return stderr;} @@ -294,8 +328,12 @@ rust_localtime(int64_t sec, int32_t nsec, rust_tm *timeptr) { const char* zone = NULL; #if defined(__WIN32__) int32_t gmtoff = -timezone; - char buffer[64]; - if (strftime(buffer, sizeof(buffer), "%Z", &tm) > 0) { + wchar_t wbuffer[64]; + char buffer[256]; + // strftime("%Z") can contain non-UTF-8 characters on non-English locale (issue #9418), + // so time zone should be converted from UTF-16 string set by wcsftime. + if (wcsftime(wbuffer, sizeof(wbuffer) / sizeof(wchar_t), L"%Z", &tm) > 0) { + WideCharToMultiByte(CP_UTF8, 0, wbuffer, -1, buffer, sizeof(buffer), NULL, NULL); zone = buffer; } #else diff --git a/src/rt/rust_globals.h b/src/rt/rust_globals.h index 9dc790b43f2..b6191fb4bd1 100644 --- a/src/rt/rust_globals.h +++ b/src/rt/rust_globals.h @@ -54,6 +54,14 @@ #include #if defined(__WIN32__) +// Prevent unnecessary #include's from +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif +// Prevent defining min and max macro +#ifndef NOMINMAX +#define NOMINMAX +#endif extern "C" { #include #include diff --git a/src/rt/rustrt.def.in b/src/rt/rustrt.def.in index fb9934c7601..66cbb5e85d6 100644 --- a/src/rt/rustrt.def.in +++ b/src/rt/rustrt.def.in @@ -10,7 +10,9 @@ rust_timegm rust_mktime precise_time_ns rust_path_is_dir +rust_path_is_dir_u16 rust_path_exists +rust_path_exists_u16 rust_get_stdin rust_get_stdout rust_get_stderr