[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

Re: windows-1252

>>>>> In [emacs-w3m : No.08013] Katsumi Yamaoka wrote:

> I'm now working to merge nnrss.el of the Gnus trunk into the Emacs
> trunk (i.e., Gnus v5.11 which supports Emacs 20 too), and I found it
> sometimes fails because of windows-1252.  Sigh.

Today I've changed nnrss.el in the Gnus CVS repository so that
it may use the windows-1252 coding system if it is available as
a substitute for iso-8859-1 [1].  And then, I tried the same way
in emacs-w3m.  It seems to go well so far.  Although it requires
that windows-1252 is available, it can be provided even in Emacs
21 using ARISAWA Akihiro's codepage-ex.el.

[1] http://article.gmane.org/gmane.emacs.gnus.general/60092

WDYT?  The patch is below.
--- shimbun/sb-rss.el~	2005-02-22 01:59:05 +0000
+++ shimbun/sb-rss.el	2005-04-06 12:17:23 +0000
@@ -208,20 +208,30 @@
 ;;; XML functions
+(defvar shimbun-rss-compatible-encoding-alist '((iso-8859-1 . windows-1252))
+  "Alist of encodings and those supersets.
+The cdr of each element is used to decode data if it is available when
+the car is what the data specify as the encoding.  Or, the car is used
+for decoding when the cdr that the data specify is not available.")
 (defun shimbun-rss-get-encoding ()
-  (let (end encoding)
-    (cond
-     ((search-forward "<?" nil t nil)
-      (let ((pos (point)))
-	(setq end (search-forward "?>"))
-	(goto-char pos))
-      (setq encoding
-	    (if (re-search-forward "encoding=\"\\([^ ]+\\)\"" end t)
-		(downcase (match-string-no-properties 1))
-	      "utf-8")))
-     (t ;; XML Default encoding.
-      (setq encoding "utf-8")))
-    (intern-soft (concat encoding "-dos"))))
+  "Return an encoding attribute specified in the current xml contents.
+If `shimbun-rss-compatible-encoding-alist' specifies the compatible
+encoding, it is used instead.  If the xml contents doesn't specify the
+encoding, return `utf-8' which is the default encoding for xml if it
+is available, otherwise return nil."
+  (if (re-search-forward
+       "<\\?[^>]*encoding=\\(\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
+       nil t)
+      (let ((encoding (intern (downcase (or (match-string 2)
+					    (match-string 3))))))
+	(or
+	 (shimbun-find-coding-system
+	  (cdr (assq encoding shimbun-rss-compatible-encoding-alist)))
+	 (shimbun-find-coding-system encoding)
+	 (shimbun-find-coding-system
+	  (car (rassq encoding shimbun-rss-compatible-encoding-alist)))))
+    (shimbun-find-coding-system 'utf-8)))
 (defun shimbun-rss-node-text (namespace local-name element)
   (let* ((node (assq (intern (concat namespace (symbol-name local-name)))
--- w3m-xmas.el~	2005-03-23 23:27:22 +0000
+++ w3m-xmas.el	2005-04-06 12:17:23 +0000
@@ -96,9 +96,15 @@
 (require 'pccl)
 ;;; Handle coding system:
-(defalias 'w3m-find-coding-system (if (fboundp 'find-coding-system)
-				      'find-coding-system
-				    'ignore))
+  (unless (fboundp 'find-coding-system)
+    (defalias 'find-coding-system 'ignore)))
+(defalias 'w3m-find-coding-system
+  (if (fboundp 'find-coding-system)
+      (lambda (obj)
+	(and obj (find-coding-system obj)))
+    'ignore))
 ;; Under XEmacs 21.5-b6 and later, `make-ccl-coding-system' will
 ;; signal an error if the coding-system has already been defined.
--- w3m.el~	2005-03-23 03:56:11 +0000
+++ w3m.el	2005-04-06 12:17:23 +0000
@@ -4004,11 +4004,17 @@
 (defun w3m-detect-xml-charset ()
   (let ((case-fold-search t))
     (goto-char (point-min))
-    (or (when (looking-at "[ \t\r\f\n]*<\\?xml[ \t\r\f\n]+")
-	  (goto-char (match-end 0))
-	  (w3m-parse-attributes ((encoding :case-ignore))
-	    encoding))
-	"utf-8")))
+    (when (looking-at "[ \t\r\f\n]*<\\?xml[ \t\r\f\n]+")
+      (goto-char (match-end 0))
+      (or (w3m-parse-attributes ((encoding :case-ignore))
+	    encoding)
+	  "utf-8"))))
+(defvar w3m-compatible-encoding-alist '((iso-8859-1 . windows-1252))
+  "Alist of encodings and those supersets.
+The cdr of each element is used to decode data if it is available when
+the car is what the data specify as the encoding.  Or, the car is used
+for decoding when the cdr that the data specify is not available.")
 (defun w3m-decode-buffer (url &optional content-charset content-type)
   (let (cs)
@@ -4019,10 +4025,7 @@
 	    (or (w3m-content-charset url)
 		(when (string= "text/html" content-type)
-		(when (string-match
-		       "\\`\\(application\\|text\\)/\\([a-z]+\\+\\)?xml\\'"
-		       content-type)
-		  (w3m-detect-xml-charset)))))
+		(w3m-detect-xml-charset))))
      ((and (eq w3m-type 'w3mmee)
 	   (or (and (stringp content-charset)
@@ -4034,13 +4037,17 @@
       (setq content-charset (w3m-correct-charset content-charset))
       (setq cs (w3m-charset-to-coding-system content-charset))))
     (setq w3m-current-content-charset content-charset)
+    (unless cs
+      (setq cs (w3m-detect-coding-region (point-min) (point-max)
+					 (if (w3m-url-local-p url)
+					     nil
+					   w3m-coding-system-priority-list))))
     (setq w3m-current-coding-system
-	  (or cs
-	      (w3m-detect-coding-region
-	       (point-min) (point-max)
-	       (if (w3m-url-local-p url)
-		   nil
-		 w3m-coding-system-priority-list))))
+	  (or (w3m-find-coding-system
+	       (cdr (assq cs w3m-compatible-encoding-alist)))
+	      (w3m-find-coding-system cs)
+	      (w3m-find-coding-system
+	       (car (rassq cs w3m-compatible-encoding-alist)))))
     (set-buffer-multibyte t)
     (decode-coding-region (point-min) (point-max) w3m-current-coding-system)))