1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-24 19:03:29 +00:00

Fix decoding of directories when "~" includes non-ASCII chars

* src/fileio.c (Fexpand_file_name): Don't build multibyte strings
from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY
have different multibyteness, as this adds bytes to the byte
sequence, and in some situations, e.g., when the home directory
includes non-ASCII characters, can fail file APIs.  (Bug#30755)

* lisp/startup.el (normal-top-level): Make sure default-directory
is set to a multibyte string when decoded on MS-Windows.

(cherry picked from commit 3aab8626ba)
This commit is contained in:
Eli Zaretskii 2018-05-18 16:34:19 +03:00
parent 35c1ab1419
commit 6e0ff4cc1f
2 changed files with 72 additions and 19 deletions

View File

@ -552,9 +552,17 @@ It is the default value of the variable `top-level'."
(if default-directory
(setq default-directory
(if (eq system-type 'windows-nt)
;; Convert backslashes to forward slashes.
(expand-file-name
(decode-coding-string default-directory coding t))
;; We pass the decoded default-directory as
;; the 2nd arg to expand-file-name to make
;; sure it sees a multibyte string as the
;; default directory; this avoids the side
;; effect of returning a unibyte string from
;; expand-file-name because it still sees
;; the undecoded value of default-directory.
(let ((defdir (decode-coding-string default-directory
coding t)))
;; Convert backslashes to forward slashes.
(expand-file-name defdir defdir))
(decode-coding-string default-directory coding t))))))
;; Decode all the important variables and directory lists, now

View File

@ -864,9 +864,61 @@ the root directory. */)
}
}
multibyte = STRING_MULTIBYTE (name);
if (multibyte != STRING_MULTIBYTE (default_directory))
bool defdir_multibyte = STRING_MULTIBYTE (default_directory);
if (multibyte != defdir_multibyte)
{
/* We want to make both NAME and DEFAULT_DIRECTORY have the same
multibyteness. Strategy:
. If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they
can be converted to the multibyteness of the other one
while keeping the same byte sequence.
. If both are non-ASCII, the only safe conversion is to
convert the multibyte one to be unibyte, because the
reverse conversion potentially adds bytes while raw bytes
are converted to their multibyte forms, which we will be
unable to account for, since the information about the
original multibyteness is lost. If those additional bytes
later leak to system APIs because they are not encoded or
because they are converted to unibyte strings by keeping
the data, file APIs will fail.
Note: One could argue that if we see a multibyte string, it
is evidence that file-name decoding was already set up, and
we could convert unibyte strings to multibyte using
DECODE_FILE. However, this is risky, because the likes of
string_to_multibyte are able of creating multibyte strings
without any decoding. */
if (multibyte)
{
bool name_ascii_p = SCHARS (name) == SBYTES (name);
unsigned char *p = SDATA (default_directory);
if (!name_ascii_p)
while (*p && ASCII_CHAR_P (*p))
p++;
if (name_ascii_p || *p != '\0')
{
/* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII.
Make a unibyte string out of NAME, and arrange for
the result of this function to be a unibyte string.
This is needed during bootstrapping and dumping, when
Emacs cannot decode file names, because the locale
environment is not set up. */
name = make_unibyte_string (SSDATA (name), SBYTES (name));
multibyte = 0;
}
else
{
/* NAME is non-ASCII and multibyte, and
DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a
multibyte string out of DEFAULT_DIRECTORY's data. */
default_directory =
make_multibyte_string (SSDATA (default_directory),
SCHARS (default_directory),
SCHARS (default_directory));
}
}
else
{
unsigned char *p = SDATA (name);
@ -874,23 +926,16 @@ the root directory. */)
p++;
if (*p == '\0')
{
/* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is
unibyte. Do not convert DEFAULT_DIRECTORY to
multibyte; instead, convert NAME to a unibyte string,
so that the result of this function is also a unibyte
string. This is needed during bootstrapping and
dumping, when Emacs cannot decode file names, because
the locale environment is not set up. */
name = make_unibyte_string (SSDATA (name), SBYTES (name));
multibyte = 0;
/* DEFAULT_DIRECTORY is multibyte and NAME is unibyte
and pure-ASCII. Make a multibyte string out of
NAME's data. */
name = make_multibyte_string (SSDATA (name),
SCHARS (name), SCHARS (name));
multibyte = 1;
}
else
default_directory = string_to_multibyte (default_directory);
}
else
{
name = string_to_multibyte (name);
multibyte = 1;
default_directory = make_unibyte_string (SSDATA (default_directory),
SBYTES (default_directory));
}
}