[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
似非多言語化
emacs-w3m
>> On Thu, 07 Jun 2001 16:26:02 +0900 (JST)
>> 「白井」== shirai@rdmg.mgcs.mei.co.jp (Hideyuki SHIRAI (白井秀行)) said as follows:
白井> こちらは、Encoding が UTF-8 でも使っている文字が日本製なら OK。
白井> euc-japan に mapping 出来れば表示出来る、ということです。
白井> # 例えば、上野さんの日記は UTF-8 だけど、"@" 以外は全部表示でき
白井> # るでしょ。
白井> euc-japan に mapping 出来ない文字に関しては、そういうことです。
白井> http://www.is.titech.ac.jp/~yagi/emacs/pringles.html は典型例で
白井> すよね。
という件に、対処できるように工夫してみました。ただし、Mule-UCS が必要
です。
この実装では、euc-japan で表現できない文字を探し出し、それらの文字を一
つずつ数値文字参照に置換してから、w3m -halfdump を呼び出し、その結果に
含まれている数値文字参照をもう一度置換する、という手続きを踏んでいます。
この中で euc-japan で表現できない文字を探す、という部分の実装がどうに
もうまくないような気がするので、commit を躊躇っているのですが、もっと
簡単な方法はないでしょうか?
Index: w3m.el
===================================================================
RCS file: /home/tsuchiya/cvsroot/emacs-w3m/w3m.el,v
retrieving revision 1.359
diff -u -u -r1.359 w3m.el
--- w3m.el 10 Aug 2001 08:43:41 -0000 1.359
+++ w3m.el 13 Aug 2001 02:29:55 -0000
@@ -81,6 +81,9 @@
(require 'thingatpt)
(require 'timezone)
+(eval-when-compile
+ (ignore-errors (require 'unicode)))
+
;; Add-on programs:
(eval-and-compile
(autoload 'w3m-bookmark-view "w3m-bookmark" nil t)
@@ -138,6 +141,14 @@
(const :tag "w3m-m17n" 'w3m-m17n)
(symbol :tag "other" nil)))
+(defcustom w3m-use-mule-ucs
+ (and (locate-library "unicode.el")
+ (eq w3m-type 'w3m))
+ "*Non nil means internationalization with mule-ucs."
+ :group 'w3m
+ :type 'boolean
+ :require 'unicode)
+
(defcustom w3m-command
(cond ((eq w3m-type 'w3mmee) "w3mmee")
((eq w3m-type 'w3m-m17n) "w3m-m17n")
@@ -660,7 +673,7 @@
latin1-entity))))))
(defconst w3m-entity-regexp
(eval-when-compile
- (format "&\\(%s\\|#[0-9]+\\);?"
+ (format "&\\(%s\\|#[0-9]+\\|#x[0-9a-f]+\\);?"
(if (fboundp 'regexp-opt)
(let ((fn (function regexp-opt)))
;; Don't funcall directly for avoiding compile warning.
@@ -1367,13 +1380,16 @@
(or (symbol-value (intern-soft name w3m-entity-db))
(if (not (char-equal (string-to-char name) ?#))
(concat "&" name) ; unknown entity
- ;; case of immediate character (accept only 0x20 .. 0x7e)
- (let ((char (string-to-int (substring name 1))))
- ;; make character's representation with learning
- (set (intern name w3m-entity-db)
- (if (or (< char 32) (< 127 char))
- "~" ; un-supported character
- (char-to-string char)))))))
+ (setq name (substring name 1))
+ (let ((char (if (char-equal (string-to-char name) ?x)
+ (string-to-number (substring name 1) 16)
+ (string-to-number name))))
+ (if w3m-use-mule-ucs
+ (char-to-string (ucs-to-char char))
+ ;; case of immediate character (accept only 0x20 .. 0x7e)
+ (if (or (< char 32) (< 127 char))
+ "~" ; un-supported character
+ (char-to-string char)))))))
(defun w3m-fontify-bold ()
"Fontify bold characters in this buffer which contains half-dumped data."
@@ -2504,7 +2520,26 @@
(write-repeat r0)))))
(define-ccl-program w3m-euc-japan-encoder
- `(1 (loop (read r0) (write-repeat r0))))
+ `(4
+ (loop
+ (read-multibyte-character r0 r1)
+ (if (r0 != 0)
+ (if (r0 != ,(w3m-static-if (boundp 'MULE)
+ lc-jp
+ (charset-id 'japanese-jisx0208)))
+ (if (r0 != ,(w3m-static-if (boundp 'MULE)
+ lc-kana
+ (charset-id 'katakana-jisx0201)))
+ (if (r0 != ,(w3m-static-if (boundp 'MULE)
+ lc-jp2
+ (charset-id 'japanese-jisx0212)))
+ ((write ?&)
+ (write ?#)
+ (write-multibyte-character r0 r1)
+ (write ?\;)
+ (repeat))))))
+ (write-multibyte-character r0 r1)
+ (repeat))))
(unless (w3m-static-if (featurep 'xemacs)
(find-coding-system 'w3m-euc-japan)
@@ -2552,6 +2587,16 @@
((member "next" rel) (setq w3m-next-url href))
((member "prev" rel) (setq w3m-previous-url href))))))))))
+(defun w3m-replace-non-euc-japan-characters ()
+ "Replace all characters, which are not members of euc-japan, into entities."
+ (encode-coding-region (point-min) (point-max) 'w3m-euc-japan)
+ (decode-coding-region (point-min) (point-max) 'emacs-mule)
+ (goto-char (point-min))
+ (while (search-forward "&#" nil t)
+ (when (looking-at "[^0-9];")
+ (insert (format "%d" (char-to-ucs (char-after))))
+ (delete-char 1))))
+
(defun w3m-rendering-region (start end &optional charset)
"Do rendering of contents in this buffer as HTML and return title."
(save-restriction
@@ -2561,6 +2606,8 @@
(w3m-check-link-tags)
(when w3m-use-form
(w3m-form-parse-region (point-min) (point-max) charset))
+ (when w3m-use-mule-ucs
+ (w3m-replace-non-euc-japan-characters))
(w3m-message "Rendering...")
(let ((coding-system-for-read w3m-output-coding-system)
(coding-system-for-write w3m-input-coding-system)
--
土屋 雅稔 ( TSUCHIYA Masatoshi )
http://www-nagao.kuee.kyoto-u.ac.jp/member/tsuchiya/