1
0
mirror of https://git.savannah.gnu.org/git/emacs.git synced 2025-01-05 11:45:45 +00:00

* lisp/vc/smerge-mode.el: Avoid N² blow up in degenerate cases

(smerge--refine-long-words): New var.
(smerge--refine-chopup-region): Use it.
This commit is contained in:
Stefan Monnier 2017-07-27 00:21:35 -04:00
parent ea87506088
commit 2d1d54d333

View File

@ -938,15 +938,15 @@ It has the following disadvantages:
- cannot use `diff -w' because the weighting causes added spaces in a line
to be represented as added copies of some line, so `diff -w' can't do the
right thing any more.
- may in degenerate cases take a 1KB input region and turn it into a 1MB
file to pass to diff.")
- Is a bit more costly (may in degenerate cases use temp files that are 10x
larger than the refined regions).")
(defun smerge--refine-forward (n)
(let ((case-fold-search nil)
(re "[[:upper:]]?[[:lower:]]+\\|[[:upper:]]+\\|[[:digit:]]+\\|.\\|\n"))
(when (and smerge-refine-ignore-whitespace
;; smerge-refine-weight-hack causes additional spaces to
;; appear as additional lines as well, so even if diff ignore
;; appear as additional lines as well, so even if diff ignores
;; whitespace changes, it'll report added/removed lines :-(
(not smerge-refine-weight-hack))
(setq re (concat "[ \t]*\\(?:" re "\\)")))
@ -954,6 +954,8 @@ It has the following disadvantages:
(unless (looking-at re) (error "Smerge refine internal error"))
(goto-char (match-end 0)))))
(defvar smerge--refine-long-words)
(defun smerge--refine-chopup-region (beg end file &optional preproc)
"Chopup the region into small elements, one per line.
Save the result into FILE.
@ -976,18 +978,46 @@ chars to try and eliminate some spurious differences."
(subst-char-in-region (point-min) (point-max) ?\n ?\s))
(goto-char (point-min))
(while (not (eobp))
(funcall smerge-refine-forward-function 1)
(let ((s (if (prog2 (forward-char -1) (bolp) (forward-char 1))
nil
(buffer-substring (line-beginning-position) (point)))))
;; We add \n after each char except after \n, so we get
;; one line per text char, where each line contains
;; just one char, except for \n chars which are
;; represented by the empty line.
(unless (eq (char-before) ?\n) (insert ?\n))
;; HACK ALERT!!
(if smerge-refine-weight-hack
(dotimes (_i (1- (length s))) (insert s "\n")))))
(cl-assert (bolp))
(let ((start (point)))
(funcall smerge-refine-forward-function 1)
(let ((len (- (point) start)))
(cl-assert (>= len 1))
;; We add \n after each chunk except after \n, so we get
;; one line per text chunk, where each line contains
;; just one chunk, except for \n chars which are
;; represented by the empty line.
(unless (bolp) (insert ?\n))
(when (and smerge-refine-weight-hack (> len 1))
(let ((s (buffer-substring-no-properties start (point))))
;; The weight-hack inserts N copies of words of size N,
;; so it naturally suffers from an O(N²) blow up.
;; To circumvent this, we map each long word
;; to a shorter (but still unique) replacement.
;; Another option would be to change smerge--refine-forward
;; so it chops up long words into smaller ones.
(when (> len 8)
(let ((short (gethash s smerge--refine-long-words)))
(unless short
;; To avoid accidental conflicts with ≤8 words,
;; we make sure the replacement is >8 chars. Overall,
;; this should bound the blowup factor to ~10x,
;; tho if those chars end up encoded as multiple bytes
;; each, it could probably still reach ~30x in
;; pathological cases.
(setq short
(concat (substring s 0 7)
" "
(string
(+ ?0
(hash-table-count
smerge--refine-long-words)))
"\n"))
(puthash s short smerge--refine-long-words))
(delete-region start (point))
(insert short)
(setq s short)))
(dotimes (_i (1- len)) (insert s)))))))
(unless (bolp) (error "Smerge refine internal error"))
(let ((coding-system-for-write 'emacs-internal))
(write-region (point-min) (point-max) file nil 'nomessage))))
@ -1042,7 +1072,9 @@ used to replace chars to try and eliminate some spurious differences."
(let* ((pos (point))
deactivate-mark ; The code does not modify any visible buffer.
(file1 (make-temp-file "diff1"))
(file2 (make-temp-file "diff2")))
(file2 (make-temp-file "diff2"))
(smerge--refine-long-words
(if smerge-refine-weight-hack (make-hash-table :test #'equal))))
(unless (markerp beg1) (setq beg1 (copy-marker beg1)))
(unless (markerp beg2) (setq beg2 (copy-marker beg2)))
;; Chop up regions into smaller elements and save into files.
@ -1062,7 +1094,7 @@ used to replace chars to try and eliminate some spurious differences."
;; also and more importantly because otherwise it
;; may happen that diff doesn't behave like
;; smerge-refine-weight-hack expects it to.
;; See http://thread.gmane.org/gmane.emacs.devel/82685.
;; See http://thread.gmane.org/gmane.emacs.devel/82685, aka https://lists.gnu.org/archive/html/emacs-devel/2007-11/msg00401.html
"-awd" "-ad")
file1 file2))
;; Process diff's output.