1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-23 18:47:57 +00:00

* international/mule.el (sgml-xml-auto-coding-function): Detect

and warn if file encoding is not utf-8 and encoding not specified.
(xml-find-file-coding-system): New function.
* international/mule-conf.el (file-coding-system-alist): Use it.
This commit is contained in:
Jason Rumney 2008-02-18 01:45:54 +00:00
parent a70f5385f7
commit c657861758
3 changed files with 48 additions and 6 deletions

View File

@ -1,3 +1,10 @@
2008-02-18 Jason Rumney <jasonr@gnu.org>
* international/mule.el (sgml-xml-auto-coding-function): Detect
and warn if file encoding is not utf-8 and encoding not specified.
(xml-find-file-coding-system): New function.
* international/mule-conf.el (file-coding-system-alist): Use it.
2008-02-17 Glenn Morris <rgm@gnu.org>
* international/mule-cmds.el (set-locale-environment): Pass

View File

@ -1470,11 +1470,7 @@ for decoding and encoding files, process I/O, etc."
(setq file-coding-system-alist
'(("\\.elc\\'" . utf-8-emacs)
("\\.utf\\(-8\\)?\\'" . utf-8)
;; This is the defined default for XML documents. It may be
;; overridden by a charset specification in the header. That
;; should be grokked by the auto-coding mechanism, but rms
;; vetoed that. -- fx
("\\.xml\\'" . utf-8)
("\\.xml\\'" . xml-find-file-coding-system)
;; We use raw-text for reading loaddefs.el so that if it
;; happens to have DOS or Mac EOLs, they are converted to
;; newlines. This is required to make the special treatment

View File

@ -2288,7 +2288,22 @@ This function is intended to be added to `auto-coding-functions'."
sym
(message "Warning: unknown coding system \"%s\"" match)
nil))
'utf-8)))))
;; Files without an encoding tag should be UTF-8. But users
;; may be naive about encodings, and have saved the file from
;; another editor that does not help them get the encoding right.
;; Detect the encoding and warn the user if it is detected as
;; something other than UTF-8.
(let ((detected
(with-coding-priority '(utf-8)
(coding-system-base
(detect-coding-region (point-min) size t)))))
;; Pure ASCII always comes back as undecided.
(if (memq detected '(utf-8 undecided))
'utf-8
(warn "File contents detected as %s.
Consider adding an encoding attribute to the xml declaration,
or saving as utf-8, as mandated by the xml specification." detected)
detected)))))))
(defun sgml-html-meta-auto-coding-function (size)
"If the buffer has an HTML meta tag, use it to determine encoding.
@ -2314,6 +2329,30 @@ This function is intended to be added to `auto-coding-functions'."
(message "Warning: unknown coding system \"%s\"" match)
nil)))))
(defun xml-find-file-coding-system (args)
"Determine the coding system of an XML file without a declaration.
Strictly speaking, the file should be utf-8, but mistakes are
made, and there are genuine cases where XML fragments are saved,
with the encoding properly specified in a master document, or
added by processing software."
(if (eq (car args) 'insert-file-contents)
(let ((detected
(with-coding-priority '(utf-8)
(coding-system-base
(detect-coding-region (point-min) (point-max) t)))))
;; Pure ASCII always comes back as undecided.
(if (memq detected '(utf-8 undecided))
'utf-8
(warn "File contents detected as %s.
Consider adding an xml declaration with the encoding specified,
or saving as utf-8, as mandated by the xml specification." detected)
detected))
;; Don't interfere with the user's wishes for saving the buffer.
;; We did what we could when the buffer was created to ensure the
;; correct encoding was used, or the user was warned, so any
;; non-conformity here is deliberate on the part of the user.
'undecided))
;;;
(provide 'mule)