[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: windows-1252
>>>>> In [emacs-w3m : No.08013] Katsumi Yamaoka wrote:
> I'm now working to merge nnrss.el of the Gnus trunk into the Emacs
> trunk (i.e., Gnus v5.11 which supports Emacs 20 too), and I found it
> sometimes fails because of windows-1252. Sigh.
Today I've changed nnrss.el in the Gnus CVS repository so that
it may use the windows-1252 coding system if it is available as
a substitute for iso-8859-1 [1]. And then, I tried the same way
in emacs-w3m. It seems to go well so far. Although it requires
that windows-1252 is available, it can be provided even in Emacs
21 using ARISAWA Akihiro's codepage-ex.el.
[1] http://article.gmane.org/gmane.emacs.gnus.general/60092
http://news.gmane.org/group/gmane.emacs.gnus.general/thread=59851
WDYT? The patch is below.
--- shimbun/sb-rss.el~ 2005-02-22 01:59:05 +0000
+++ shimbun/sb-rss.el 2005-04-06 12:17:23 +0000
@@ -208,20 +208,30 @@
;;; XML functions
+(defvar shimbun-rss-compatible-encoding-alist '((iso-8859-1 . windows-1252))
+ "Alist of encodings and those supersets.
+The cdr of each element is used to decode data if it is available when
+the car is what the data specify as the encoding. Or, the car is used
+for decoding when the cdr that the data specify is not available.")
+
(defun shimbun-rss-get-encoding ()
- (let (end encoding)
- (cond
- ((search-forward "<?" nil t nil)
- (let ((pos (point)))
- (setq end (search-forward "?>"))
- (goto-char pos))
- (setq encoding
- (if (re-search-forward "encoding=\"\\([^ ]+\\)\"" end t)
- (downcase (match-string-no-properties 1))
- "utf-8")))
- (t ;; XML Default encoding.
- (setq encoding "utf-8")))
- (intern-soft (concat encoding "-dos"))))
+ "Return an encoding attribute specified in the current xml contents.
+If `shimbun-rss-compatible-encoding-alist' specifies the compatible
+encoding, it is used instead. If the xml contents doesn't specify the
+encoding, return `utf-8' which is the default encoding for xml if it
+is available, otherwise return nil."
+ (if (re-search-forward
+ "<\\?[^>]*encoding=\\(\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
+ nil t)
+ (let ((encoding (intern (downcase (or (match-string 2)
+ (match-string 3))))))
+ (or
+ (shimbun-find-coding-system
+ (cdr (assq encoding shimbun-rss-compatible-encoding-alist)))
+ (shimbun-find-coding-system encoding)
+ (shimbun-find-coding-system
+ (car (rassq encoding shimbun-rss-compatible-encoding-alist)))))
+ (shimbun-find-coding-system 'utf-8)))
(defun shimbun-rss-node-text (namespace local-name element)
(let* ((node (assq (intern (concat namespace (symbol-name local-name)))
--- w3m-xmas.el~ 2005-03-23 23:27:22 +0000
+++ w3m-xmas.el 2005-04-06 12:17:23 +0000
@@ -96,9 +96,15 @@
(require 'pccl)
;;; Handle coding system:
-(defalias 'w3m-find-coding-system (if (fboundp 'find-coding-system)
- 'find-coding-system
- 'ignore))
+(eval-when-compile
+ (unless (fboundp 'find-coding-system)
+ (defalias 'find-coding-system 'ignore)))
+
+(defalias 'w3m-find-coding-system
+ (if (fboundp 'find-coding-system)
+ (lambda (obj)
+ (and obj (find-coding-system obj)))
+ 'ignore))
;; Under XEmacs 21.5-b6 and later, `make-ccl-coding-system' will
;; signal an error if the coding-system has already been defined.
--- w3m.el~ 2005-03-23 03:56:11 +0000
+++ w3m.el 2005-04-06 12:17:23 +0000
@@ -4004,11 +4004,17 @@
(defun w3m-detect-xml-charset ()
(let ((case-fold-search t))
(goto-char (point-min))
- (or (when (looking-at "[ \t\r\f\n]*<\\?xml[ \t\r\f\n]+")
- (goto-char (match-end 0))
- (w3m-parse-attributes ((encoding :case-ignore))
- encoding))
- "utf-8")))
+ (when (looking-at "[ \t\r\f\n]*<\\?xml[ \t\r\f\n]+")
+ (goto-char (match-end 0))
+ (or (w3m-parse-attributes ((encoding :case-ignore))
+ encoding)
+ "utf-8"))))
+
+(defvar w3m-compatible-encoding-alist '((iso-8859-1 . windows-1252))
+ "Alist of encodings and those supersets.
+The cdr of each element is used to decode data if it is available when
+the car is what the data specify as the encoding. Or, the car is used
+for decoding when the cdr that the data specify is not available.")
(defun w3m-decode-buffer (url &optional content-charset content-type)
(let (cs)
@@ -4019,10 +4025,7 @@
(or (w3m-content-charset url)
(when (string= "text/html" content-type)
(w3m-detect-meta-charset))
- (when (string-match
- "\\`\\(application\\|text\\)/\\([a-z]+\\+\\)?xml\\'"
- content-type)
- (w3m-detect-xml-charset)))))
+ (w3m-detect-xml-charset))))
(cond
((and (eq w3m-type 'w3mmee)
(or (and (stringp content-charset)
@@ -4034,13 +4037,17 @@
(setq content-charset (w3m-correct-charset content-charset))
(setq cs (w3m-charset-to-coding-system content-charset))))
(setq w3m-current-content-charset content-charset)
+ (unless cs
+ (setq cs (w3m-detect-coding-region (point-min) (point-max)
+ (if (w3m-url-local-p url)
+ nil
+ w3m-coding-system-priority-list))))
(setq w3m-current-coding-system
- (or cs
- (w3m-detect-coding-region
- (point-min) (point-max)
- (if (w3m-url-local-p url)
- nil
- w3m-coding-system-priority-list))))
+ (or (w3m-find-coding-system
+ (cdr (assq cs w3m-compatible-encoding-alist)))
+ (w3m-find-coding-system cs)
+ (w3m-find-coding-system
+ (car (rassq cs w3m-compatible-encoding-alist)))))
(set-buffer-multibyte t)
(decode-coding-region (point-min) (point-max) w3m-current-coding-system)))