From 6e0ff4cc1f261def00f9f9dd581ba6ef72703f0c Mon Sep 17 00:00:00 2001 From: Eli Zaretskii Date: Fri, 18 May 2018 16:34:19 +0300 Subject: [PATCH] Fix decoding of directories when "~" includes non-ASCII chars * src/fileio.c (Fexpand_file_name): Don't build multibyte strings from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY have different multibyteness, as this adds bytes to the byte sequence, and in some situations, e.g., when the home directory includes non-ASCII characters, can fail file APIs. (Bug#30755) * lisp/startup.el (normal-top-level): Make sure default-directory is set to a multibyte string when decoded on MS-Windows. (cherry picked from commit 3aab8626ba5080bb04d0fdae52d99c850a842a52) --- lisp/startup.el | 14 +++++++-- src/fileio.c | 77 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 72 insertions(+), 19 deletions(-) diff --git a/lisp/startup.el b/lisp/startup.el index 9d16b59defd..33f8ca63f8d 100644 --- a/lisp/startup.el +++ b/lisp/startup.el @@ -552,9 +552,17 @@ It is the default value of the variable `top-level'." (if default-directory (setq default-directory (if (eq system-type 'windows-nt) - ;; Convert backslashes to forward slashes. - (expand-file-name - (decode-coding-string default-directory coding t)) + ;; We pass the decoded default-directory as + ;; the 2nd arg to expand-file-name to make + ;; sure it sees a multibyte string as the + ;; default directory; this avoids the side + ;; effect of returning a unibyte string from + ;; expand-file-name because it still sees + ;; the undecoded value of default-directory. + (let ((defdir (decode-coding-string default-directory + coding t))) + ;; Convert backslashes to forward slashes. + (expand-file-name defdir defdir)) (decode-coding-string default-directory coding t)))))) ;; Decode all the important variables and directory lists, now diff --git a/src/fileio.c b/src/fileio.c index c4a10000bc3..9dbe3ad788e 100644 --- a/src/fileio.c +++ b/src/fileio.c @@ -864,9 +864,61 @@ the root directory. */) } } multibyte = STRING_MULTIBYTE (name); - if (multibyte != STRING_MULTIBYTE (default_directory)) + bool defdir_multibyte = STRING_MULTIBYTE (default_directory); + if (multibyte != defdir_multibyte) { + /* We want to make both NAME and DEFAULT_DIRECTORY have the same + multibyteness. Strategy: + . If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they + can be converted to the multibyteness of the other one + while keeping the same byte sequence. + . If both are non-ASCII, the only safe conversion is to + convert the multibyte one to be unibyte, because the + reverse conversion potentially adds bytes while raw bytes + are converted to their multibyte forms, which we will be + unable to account for, since the information about the + original multibyteness is lost. If those additional bytes + later leak to system APIs because they are not encoded or + because they are converted to unibyte strings by keeping + the data, file APIs will fail. + + Note: One could argue that if we see a multibyte string, it + is evidence that file-name decoding was already set up, and + we could convert unibyte strings to multibyte using + DECODE_FILE. However, this is risky, because the likes of + string_to_multibyte are able of creating multibyte strings + without any decoding. */ if (multibyte) + { + bool name_ascii_p = SCHARS (name) == SBYTES (name); + unsigned char *p = SDATA (default_directory); + + if (!name_ascii_p) + while (*p && ASCII_CHAR_P (*p)) + p++; + if (name_ascii_p || *p != '\0') + { + /* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII. + Make a unibyte string out of NAME, and arrange for + the result of this function to be a unibyte string. + This is needed during bootstrapping and dumping, when + Emacs cannot decode file names, because the locale + environment is not set up. */ + name = make_unibyte_string (SSDATA (name), SBYTES (name)); + multibyte = 0; + } + else + { + /* NAME is non-ASCII and multibyte, and + DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a + multibyte string out of DEFAULT_DIRECTORY's data. */ + default_directory = + make_multibyte_string (SSDATA (default_directory), + SCHARS (default_directory), + SCHARS (default_directory)); + } + } + else { unsigned char *p = SDATA (name); @@ -874,23 +926,16 @@ the root directory. */) p++; if (*p == '\0') { - /* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is - unibyte. Do not convert DEFAULT_DIRECTORY to - multibyte; instead, convert NAME to a unibyte string, - so that the result of this function is also a unibyte - string. This is needed during bootstrapping and - dumping, when Emacs cannot decode file names, because - the locale environment is not set up. */ - name = make_unibyte_string (SSDATA (name), SBYTES (name)); - multibyte = 0; + /* DEFAULT_DIRECTORY is multibyte and NAME is unibyte + and pure-ASCII. Make a multibyte string out of + NAME's data. */ + name = make_multibyte_string (SSDATA (name), + SCHARS (name), SCHARS (name)); + multibyte = 1; } else - default_directory = string_to_multibyte (default_directory); - } - else - { - name = string_to_multibyte (name); - multibyte = 1; + default_directory = make_unibyte_string (SSDATA (default_directory), + SBYTES (default_directory)); } }