[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

Re: Bug#728501: w3m-el: mew-w3m-region & multiple blockquote



In [emacs-w3m : No.12209] Kinoshita-san wrote:
> Forwarding from Debian Bug:
> http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=728501

> On November 2, 2013 at 12:00AM +0100, Christophe.Troestler (at
> umons.ac.be) wrote:
>> Package: w3m-el
>> Version: 1.4.483+0.20120614-3
>> Severity: normal
>>
>> Dear Maintainer,
>>
>> The function `mew-w3m-region` in mew-w3m.el does not work with a
>> blockquote inside another blockquote (this may happen when citations
>> are inside other citations in emails).  I guess one must use
>> `w3m-end-of-tag` instead of `w3m-search-tag` to locate the closing
>> tag.
>>
>> Also, <div> start new lines ― which must be quoted ― but one has to be
>> careful that <br><div>, <br></div><div><div>,... only produce a single
>> new line.
>>
>> It will be appreciated that these problems be fixed.
>>
>> Best,
>> C.

I tried improving `mew-w3m-region' (I'm not a Mew user though).
Does the attached patch make it do what you think right?
If not, could you send me an html example?
--- mew-w3m.el~	2010-01-13 00:59:40.000000000 +0000
+++ mew-w3m.el	2013-11-05 03:31:53.844275200 +0000
@@ -161,56 +161,102 @@
 	  (mew-w3m-add-text-properties `(w3m-images ,(not image)))
 	  (set-buffer-modified-p nil)))))))
 
+(defun mew-w3m-cite-blockquote (&optional inside-blockquote)
+  "Quote paragraphs in <blockquote>...</blockquote> with the citation mark.
+The variable `mew-w3m-region-cite-mark' specifies the citation mark."
+  (let ((case-fold-search t))
+    (while (and (re-search-forward "\
+\[\t\n ]*<[\t\n ]*blockquote\\(?:[\t\n ]*>\\|[\t\n ]+[^>]+>\\)" nil t)
+		(w3m-end-of-tag "blockquote" t))
+      (save-restriction
+	(narrow-to-region (match-beginning 0) (match-end 0))
+	(delete-region (goto-char (match-end 3)) (match-end 0))
+	(insert "\n")
+	(delete-region (goto-char (point-min)) (match-beginning 3))
+	(insert "\n")
+	(while (and (re-search-forward
+		     "<[\t\n ]*pre\\(?:[\t\n ]*>\\|[\t\n ]+[^>]+>\\)" nil t)
+		    (w3m-end-of-tag "pre" t))
+	  (delete-region (goto-char (match-end 2)) (match-end 0))
+	  (if (bolp)
+	      (when (looking-at "\n+") (replace-match ""))
+	    (insert "\n"))
+	  (delete-region (goto-char (match-beginning 0)) (match-beginning 2))
+	  (if (bolp)
+	      (when (looking-at "\n+") (replace-match ""))
+	    (insert "\n")))
+	(goto-char (point-min))
+	(mew-w3m-cite-blockquote 'inside-blockquote)
+	(goto-char (point-min))
+	(while (re-search-forward
+		"[\t\n ]*<br\\(?:[\t\n ]*>\\|[\t\n ]+[^>]+>\\)" nil t)
+	  (replace-match "\n"))
+	(goto-char (point-min))
+	(while (and (re-search-forward
+		     "[\t\n ]*<div\\(?:[\t\n ]*>\\|[\t\n ]+[^>]+>\\)" nil t)
+		    (w3m-end-of-tag "div"))
+	  (goto-char (match-end 0))
+	  (insert "\n")
+	  (goto-char (match-beginning 0))
+	  (insert "\n"))
+	(goto-char (point-min))
+	(while (re-search-forward "^[\t <>]+$" nil t)
+	  (replace-match ""))
+	(goto-char (point-min))
+	(while (re-search-forward "\n\n\n+" nil t)
+	  (replace-match "\n\n"))
+	(goto-char (point-min))
+	(when mew-w3m-region-cite-mark
+	  (goto-char (point-min))
+	  (while (re-search-forward "[^\t\n ]" nil t)
+	    (beginning-of-line)
+	    (if (looking-at "[\t ]+")
+		(replace-match mew-w3m-region-cite-mark)
+	      (insert mew-w3m-region-cite-mark))
+	    (end-of-line)))
+	(unless inside-blockquote
+	  ; "> > > " --> ">>> "
+	  (when (and mew-w3m-region-cite-mark
+		     (string-match "&nbsp;\\'" mew-w3m-region-cite-mark))
+	    (let ((base (substring mew-w3m-region-cite-mark
+				   0 (match-beginning 0)))
+		  (regexp (regexp-quote mew-w3m-region-cite-mark)))
+	      (setq regexp (concat "^" regexp "\\(?:" regexp "\\)+"))
+	      (goto-char (point-min))
+	      (while (re-search-forward regexp nil t)
+		(dotimes (i (prog1
+				(/ (- (match-end 0) (match-beginning 0))
+				   (length mew-w3m-region-cite-mark))
+			      (delete-region (match-beginning 0)
+					     (match-end 0))))
+		  (insert base))
+		(insert "&nbsp;"))))
+	  (goto-char (point-min))
+	  (insert "<pre>")
+	  (goto-char (point-max))
+	  (insert "</pre>\n"))))))
+
 (defun mew-w3m-region (start end &optional url charset)
   "w3m-region with inserting the cite mark."
   (if (null mew-w3m-region-cite-mark)
       (w3m-region start end url charset)
     (save-restriction
       (narrow-to-region start end)
-      (let ((case-fold-search t)
-	    pos lines tagbeg0 tagend0 tagbeg1 tagend1)
-	(goto-char (point-min))
-	(while (w3m-search-tag "blockquote")
-	  (setq tagbeg0 (match-beginning 0))
-	  (setq tagend0 (match-end 0))
-	  (when (w3m-search-tag "/blockquote")
-	    (setq tagbeg1 (match-beginning 0))
-	    (setq tagend1 (match-end 0))
-	    (setq lines (buffer-substring tagend0 tagbeg1))
-	    (delete-region tagbeg0 tagend1)
-	    (insert (with-temp-buffer
-		      (insert lines)
-		      (goto-char (point-min))
-		      (if (and (w3m-search-tag "pre")
-			       (setq tagbeg0 (match-beginning 0))
-			       (setq tagend0 (match-end 0))
-			       (w3m-search-tag "/pre")
-			       (setq tagbeg1 (match-beginning 0))
-			       (setq tagend1 (match-end 0)))
-			  (progn
-			    (delete-region tagbeg1 tagend1)
-			    (delete-region tagbeg0 tagend0))
-			;; delete <br>
-			(goto-char (point-min))
-			(while (w3m-search-tag "br")
-			  (delete-region (match-beginning 0) (match-end 0))
-			  (unless (looking-at "[\n\r]") (insert "\n"))))
-		      (goto-char (point-max))
-		      (skip-chars-backward " \t\n\f\r")
-		      (delete-region (point) (point-max))
-		      (goto-char (point-min))
-		      (skip-chars-forward " \t\n\f\r")
-		      (delete-region (point-min) (point))
-		      (goto-char (point-min))
-		      (while (not (eobp))
-			(insert mew-w3m-region-cite-mark)
-			(forward-line 1))
-		      (goto-char (point-min))
-		      (insert "<pre>\n")
-		      (goto-char (point-max))
-		      (insert "\n</pre>\n")
-		      (buffer-substring (point-min) (point-max)))))))
-      (w3m-region (point-min) (point-max) url charset))))
+      (mew-w3m-cite-blockquote)
+      (w3m-region (point-min) (point-max) url charset)
+      (goto-char (point-min))
+      (while (re-search-forward "^[\t ]+$" nil t)
+	(replace-match ""))
+      (goto-char (point-min))
+      (while (re-search-forward "\n\n\n+" nil t)
+	(replace-match "\n\n"))
+      (goto-char (point-min))
+      (skip-chars-forward "\n")
+      (delete-region (point-min) (point))
+      (goto-char (point-max))
+      (skip-chars-backward "\n")
+      (delete-region (point) (point-max))
+      (insert "\n"))))
 
 ;; processing Text/Html contents with w3m.
 (defun mew-mime-text/html-w3m (&rest args)