mirror of
https://git.savannah.gnu.org/git/emacs.git
synced 2025-01-24 19:03:29 +00:00
Fix decoding of directories when "~" includes non-ASCII chars
* src/fileio.c (Fexpand_file_name): Don't build multibyte strings
from unibyte non-ASCII strings when NAME and DEFAULT_DIRECTORY
have different multibyteness, as this adds bytes to the byte
sequence, and in some situations, e.g., when the home directory
includes non-ASCII characters, can fail file APIs. (Bug#30755)
* lisp/startup.el (normal-top-level): Make sure default-directory
is set to a multibyte string when decoded on MS-Windows.
(cherry picked from commit 3aab8626ba
)
This commit is contained in:
parent
35c1ab1419
commit
6e0ff4cc1f
@ -552,9 +552,17 @@ It is the default value of the variable `top-level'."
|
||||
(if default-directory
|
||||
(setq default-directory
|
||||
(if (eq system-type 'windows-nt)
|
||||
;; Convert backslashes to forward slashes.
|
||||
(expand-file-name
|
||||
(decode-coding-string default-directory coding t))
|
||||
;; We pass the decoded default-directory as
|
||||
;; the 2nd arg to expand-file-name to make
|
||||
;; sure it sees a multibyte string as the
|
||||
;; default directory; this avoids the side
|
||||
;; effect of returning a unibyte string from
|
||||
;; expand-file-name because it still sees
|
||||
;; the undecoded value of default-directory.
|
||||
(let ((defdir (decode-coding-string default-directory
|
||||
coding t)))
|
||||
;; Convert backslashes to forward slashes.
|
||||
(expand-file-name defdir defdir))
|
||||
(decode-coding-string default-directory coding t))))))
|
||||
|
||||
;; Decode all the important variables and directory lists, now
|
||||
|
77
src/fileio.c
77
src/fileio.c
@ -864,9 +864,61 @@ the root directory. */)
|
||||
}
|
||||
}
|
||||
multibyte = STRING_MULTIBYTE (name);
|
||||
if (multibyte != STRING_MULTIBYTE (default_directory))
|
||||
bool defdir_multibyte = STRING_MULTIBYTE (default_directory);
|
||||
if (multibyte != defdir_multibyte)
|
||||
{
|
||||
/* We want to make both NAME and DEFAULT_DIRECTORY have the same
|
||||
multibyteness. Strategy:
|
||||
. If either NAME or DEFAULT_DIRECTORY is pure-ASCII, they
|
||||
can be converted to the multibyteness of the other one
|
||||
while keeping the same byte sequence.
|
||||
. If both are non-ASCII, the only safe conversion is to
|
||||
convert the multibyte one to be unibyte, because the
|
||||
reverse conversion potentially adds bytes while raw bytes
|
||||
are converted to their multibyte forms, which we will be
|
||||
unable to account for, since the information about the
|
||||
original multibyteness is lost. If those additional bytes
|
||||
later leak to system APIs because they are not encoded or
|
||||
because they are converted to unibyte strings by keeping
|
||||
the data, file APIs will fail.
|
||||
|
||||
Note: One could argue that if we see a multibyte string, it
|
||||
is evidence that file-name decoding was already set up, and
|
||||
we could convert unibyte strings to multibyte using
|
||||
DECODE_FILE. However, this is risky, because the likes of
|
||||
string_to_multibyte are able of creating multibyte strings
|
||||
without any decoding. */
|
||||
if (multibyte)
|
||||
{
|
||||
bool name_ascii_p = SCHARS (name) == SBYTES (name);
|
||||
unsigned char *p = SDATA (default_directory);
|
||||
|
||||
if (!name_ascii_p)
|
||||
while (*p && ASCII_CHAR_P (*p))
|
||||
p++;
|
||||
if (name_ascii_p || *p != '\0')
|
||||
{
|
||||
/* DEFAULT_DIRECTORY is unibyte and possibly non-ASCII.
|
||||
Make a unibyte string out of NAME, and arrange for
|
||||
the result of this function to be a unibyte string.
|
||||
This is needed during bootstrapping and dumping, when
|
||||
Emacs cannot decode file names, because the locale
|
||||
environment is not set up. */
|
||||
name = make_unibyte_string (SSDATA (name), SBYTES (name));
|
||||
multibyte = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* NAME is non-ASCII and multibyte, and
|
||||
DEFAULT_DIRECTORY is unibyte and pure-ASCII: make a
|
||||
multibyte string out of DEFAULT_DIRECTORY's data. */
|
||||
default_directory =
|
||||
make_multibyte_string (SSDATA (default_directory),
|
||||
SCHARS (default_directory),
|
||||
SCHARS (default_directory));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned char *p = SDATA (name);
|
||||
|
||||
@ -874,23 +926,16 @@ the root directory. */)
|
||||
p++;
|
||||
if (*p == '\0')
|
||||
{
|
||||
/* NAME is a pure ASCII string, and DEFAULT_DIRECTORY is
|
||||
unibyte. Do not convert DEFAULT_DIRECTORY to
|
||||
multibyte; instead, convert NAME to a unibyte string,
|
||||
so that the result of this function is also a unibyte
|
||||
string. This is needed during bootstrapping and
|
||||
dumping, when Emacs cannot decode file names, because
|
||||
the locale environment is not set up. */
|
||||
name = make_unibyte_string (SSDATA (name), SBYTES (name));
|
||||
multibyte = 0;
|
||||
/* DEFAULT_DIRECTORY is multibyte and NAME is unibyte
|
||||
and pure-ASCII. Make a multibyte string out of
|
||||
NAME's data. */
|
||||
name = make_multibyte_string (SSDATA (name),
|
||||
SCHARS (name), SCHARS (name));
|
||||
multibyte = 1;
|
||||
}
|
||||
else
|
||||
default_directory = string_to_multibyte (default_directory);
|
||||
}
|
||||
else
|
||||
{
|
||||
name = string_to_multibyte (name);
|
||||
multibyte = 1;
|
||||
default_directory = make_unibyte_string (SSDATA (default_directory),
|
||||
SBYTES (default_directory));
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user