diff --git a/lisp/mail/rmail.el b/lisp/mail/rmail.el index e29ac965947..a1207e6d18e 100644 --- a/lisp/mail/rmail.el +++ b/lisp/mail/rmail.el @@ -2619,6 +2619,100 @@ buffer to the end of the headers." (goto-char lim)))) (t (error "No headers selected for display!")))))))) +(defun rmail-redecode-body (coding &optional raw) + "Decode the body of the current message using coding system CODING. +This is useful with mail messages that have malformed or missing +charset= headers. + +This function assumes that the current message is already decoded +and displayed in the RMAIL buffer, but the coding system used to +decode it was incorrect. It then encodes the message back to its +original form, and decodes it again, using the coding system CODING. + +Optional argument RAW, if non-nil, means don't encode the message +before decoding it with the new CODING. This is useful if the current +message text was produced by some function which invokes `insert', +since `insert' leaves unibyte character codes 128 through 255 unconverted +to multibyte. One example of such a situation is when the text was +produced by `base64-decode-region'. + +Interactively, invoke the function with a prefix argument to set RAW +non-nil. + +Note that if Emacs erroneously auto-detected one of the iso-2022 +encodings in the message, this function might fail because the escape +sequences that switch between character sets and also single-shift and +locking-shift codes are impossible to recover. This function is meant +to be used to fix messages encoded with 8-bit encodings, such as +iso-8859, koi8-r, etc." + (interactive "zCoding system for re-decoding this message: ") + (when (not rmail-enable-mime) + (save-excursion + (set-buffer rmail-buffer) + (rmail-swap-buffers-maybe) + (save-restriction + (widen) + (let ((raw (or raw current-prefix-arg)) + (msgbeg (rmail-msgbeg rmail-current-message)) + (msgend (rmail-msgend rmail-current-message)) + (buffer-read-only nil) + body-start x-coding-header old-coding) + (narrow-to-region msgbeg msgend) + (goto-char (point-min)) + (unless (setq body-start (search-forward "\n\n" (point-max) 1)) + (error "No message body")) + + (save-restriction + ;; Narrow to headers + (narrow-to-region (point-min) body-start) + (goto-char (point-min)) + (unless (re-search-forward "^X-Coding-System: *\\(.*\\)$" nil t) + (error "No X-Coding-System header found")) + (setq old-coding (intern (match-string 1))) + (check-coding-system old-coding) + ;; Make sure the new coding system uses the same EOL + ;; conversion, to prevent ^M characters from popping up + ;; all over the place. + (setq coding + (coding-system-change-eol-conversion + coding (coding-system-eol-type old-coding))) + ;; If old-coding is `undecided', encode-coding-region + ;; will not encode the text at all. Find a proper + ;; non-trivial encoding to use. + (when (memq (coding-system-base old-coding) '(nil undecided)) + (setq old-coding + (car (find-coding-systems-region msgbeg msgend)))) + (setq x-coding-header (point))) + + (save-restriction + ;; Narrow to message body + (narrow-to-region body-start (point-max)) + (and (null raw) + ;; If old and new encoding are the same, it + ;; clearly doesn't make sense to encode. + (not (coding-system-equal + (coding-system-base old-coding) + (coding-system-base coding))) + ;; If the body includes only eight-bit-* + ;; characters, encoding might fail, e.g. with + ;; UTF-8, and isn't needed anyway. + (> (length (delq 'ascii + (delq 'eight-bit-graphic + (delq 'eight-bit-control + (find-charset-region + (point-min) (point-max)))))) + 0) + (encode-coding-region (point-min) (point-max) old-coding)) + (decode-coding-region (point-min) (point-max) coding) + (setq last-coding-system-used coding)) + + ;; Rewrite the coding-system header. + (goto-char x-coding-header) + (delete-region (line-beginning-position) (point)) + (insert "X-Coding-System: " + (symbol-name last-coding-system-used)) + (rmail-show-message-maybe)))))) + ;; Find all occurrences of certain fields, and highlight them. (defun rmail-highlight-headers () ;; Do this only if the system supports faces.