[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

Re: charset attribute of anchor tag



>> On Wed, 18 Jun 2003 09:53:16 +0300
>> pgas@intracom.gr (Pierre Gaston) said as follows:

>gosh, if only i was not so lazy and had not drop these Japanese
>lectures....

I am sorry for my laziness in English composition.

>anyway, I've downloaded the w3m.el w3m-proc.el w3m-form.el from cvs
>and put these in the rc4 release.

Because your replaced files of CVS head and same files of
emacs-w3m-1.3.4 are not compatible, you will meet inconceivable ill
effects.

You should use a patch attached at the last of this message instead of
replacing files.  It provide the same change described in
[emacs-w3m:05228], but is not tested yet.  So, I am sure that it still
has ill effects (This is the reason why I have not installed it to the
stable branch.).

-- 
TSUCHIYA Masatoshi

diff -u -r1.797 w3m.el
--- w3m.el	17 Jun 2003 01:18:41 -0000	1.797
+++ w3m.el	18 Jun 2003 01:53:41 -0000
@@ -2272,6 +2272,30 @@
 	 w3m-coding-system
 	 'iso-2022-7bit))))
 
+(defsubst w3m-url-transfer-encode-string (url &optional coding)
+  "Encode all non-ASCII characters included in URL to sequences of
+escaped octets in the specified coding system.
+This function is designed for conversion for safe transmission of URL.
+Therefore, this function handles only non-ASCII characters that can
+not be transmitted safely with network streams.  In general, you
+should use `w3m-url-encode-string' instead of this."
+  (let ((start 0)
+	(buf))
+    (while (string-match "[^\x21-\x7e]+" url start)
+      (setq buf
+	    (cons (apply 'concat
+			 (mapcar
+			  (lambda (c) (format "%%%02x" c))
+			  (append (encode-coding-string
+				   (match-string 0 url)
+				   (or coding
+				       w3m-current-coding-system)))))
+		  (cons (substring url start (match-beginning 0))
+			buf))
+	    start (match-end 0)))
+    (apply 'concat
+	   (nreverse (cons (substring url start) buf)))))
+
 
 ;;; HTML character entity handling:
 (defun w3m-entity-db-setup ()
@@ -2376,7 +2400,8 @@
     (while (re-search-forward "<a[ \t\r\f\n]+" nil t)
       (setq start (match-beginning 0))
       (setq prenames (get-text-property start 'w3m-name-anchor))
-      (w3m-parse-attributes (href name (rel :case-ignore) (hseq :integer))
+      (w3m-parse-attributes (href name charset
+				  (rel :case-ignore) (hseq :integer))
 	(when rel
 	  (setq rel (split-string rel))
 	  (cond
@@ -2391,9 +2416,13 @@
 	  (when (re-search-forward "[ \t\r\f\n]*\\(</a>\\)" nil t)
 	    (setq end (match-beginning 0))
 	    (delete-region (match-beginning 1) (match-end 1))
-	    (setq href (w3m-expand-url (w3m-decode-anchor-string href)))
-	    (setq hseq (or (and (null hseq) 0) (abs hseq)))
-	    (setq w3m-max-anchor-sequence (max hseq w3m-max-anchor-sequence))
+	    (setq href (w3m-url-transfer-encode-string
+			(w3m-expand-url (w3m-decode-anchor-string href))
+			(if charset
+			    (w3m-charset-to-coding-system charset)
+			  w3m-current-coding-system))
+		  hseq (or (and (null hseq) 0) (abs hseq))
+		  w3m-max-anchor-sequence (max hseq w3m-max-anchor-sequence))
 	    (w3m-add-text-properties start end
 				     (list 'face (if (w3m-arrived-p href)
 						     'w3m-arrived-anchor-face
@@ -2919,7 +2948,7 @@
 	(setq w3m-input-url-history
 	      (cons url (delete url w3m-input-url-history))))
       ;; return value
-      url)))
+      (w3m-url-transfer-encode-string url w3m-default-coding-system))))
 
 
 ;;; Cache:
diff -u -r1.105 w3m-form.el
--- w3m-form.el	8 Jun 2003 06:15:51 -0000	1.105
+++ w3m-form.el	18 Jun 2003 01:53:41 -0000
@@ -448,7 +448,11 @@
 					(enctype :case-ignore)
 					(charset :case-ignore))
 	    (when action
-	      (setq action (w3m-decode-anchor-string action)))
+	      (setq action (w3m-url-transfer-encode-string
+			    (w3m-decode-anchor-string action)
+			    (if charset
+				(w3m-charset-to-coding-system charset)
+			      w3m-current-coding-system))))
 	    (setq forms
 		  (cons
 		   (cons