[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
w3m-ucs-e21
- From: ari@xxxxxxxxxxxxxxxx (ARISAWA Akihiro)
- Date: Sat, 12 Jul 2003 01:41:57 +0900
- X-ml-name: emacs-w3m
- X-mail-count: 05410
Emacs21 内蔵の unicode に対応した w3m-euc-japan-encoder を作成してみました。
添付のファイルを load してから、w3m を load すると利用できます。
Emacs21 w/o Mule-UCS と素の w3m で utf-8 のページを見れたりするはずです。
ただし、Emacs21.3 以前の unicode 対応は日本語等が含まれていないので、
あまり嬉しくありません。21.3.50 であれば CJK に対応しているので、
使いでがあるかと思います。
;; 個人的には 21.3.50 はまだ日常的に使う気になれないので、要望等が
;; 無ければ emacs-w3m への組込みは、pretest が始まるくらいまでは
;; 待とうかと思っています。
--
有沢 明宏
;(utf-translate-cjk-mode 1)
(eval-and-compile
(defconst w3m-ucs-generate-ncr-program
`((;; (1) Convert a set of r1 (charset-id) and r0 (codepoint) to a
;; UCS codepoint.
(if (r1 == ,(charset-id 'latin-iso8859-1))
((r1 = (r0 + 128)))
(if (r1 == ,(charset-id 'mule-unicode-0100-24ff))
((r1 = ((((r0 & #x3f80) >> 7) - 32) * 96))
(r0 &= #x7f)
(r1 += (r0 + 224))) ; 224 == -32 + #x0100
(if (r1 == ,(charset-id 'mule-unicode-2500-33ff))
((r1 = ((((r0 & #x3f80) >> 7) - 32) * 96))
(r0 &= #x7f)
(r1 += (r0 + 9440))) ; 9440 == -32 + #x2500
(if (r1 == ,(charset-id 'mule-unicode-e000-ffff))
((r1 = ((((r0 & #x3f80) >> 7) - 32) * 96))
(r0 &= #x7f)
(r1 += (r0 + 57312))) ; 57312 == -32 + #xe000
,(if (fboundp 'ccl-compile-lookup-character)
'((lookup-character utf-subst-table-for-encode r1 r0)
(if (r7 == 0) ; lookup failed
(rx1 = #xfffd)))
'((r1 = #xfffd)))))))
;; (2) Generate a string which represents a UCS codepoint in NCR.
(r0 = 0)
(r2 = 0)
(loop
(r0 = (r0 << 4))
(r0 |= (r1 & 15))
(r1 = (r1 >> 4))
(if (r1 == 0)
(break)
((r2 += 1)
(repeat))))
(write "&#x")
(loop
(branch (r0 & 15)
,@(mapcar
(lambda (i)
(list 'write (string-to-char (format "%x" i))))
'(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15)))
(r0 = (r0 >> 4))
(if (r2 == 0)
((write ?\;)
(break))
((r2 -= 1)
(repeat)))))
(repeat))
"CCL program to represents other characters in NCR
(Numeric Character References)."))
(define-ccl-program w3m-euc-japan-encoder
`(4
(loop
(read-multibyte-character r1 r0)
(if (r1 == ,(charset-id 'ascii))
;; (1) ASCII characters
(write-repeat r0))
(if (r1 == ,(charset-id 'latin-jisx0201))
;; (2) Latin Part of Japanese JISX0201.1976
;; Convert to ASCII
(write-repeat r0))
(r2 = (r1 == ,(charset-id 'japanese-jisx0208-1978)))
(if ((r1 == ,(charset-id 'japanese-jisx0208)) | r2)
;; (3) Characters of Japanese JISX0208.
((r1 = ((r0 & 127) | 128))
(r0 = ((r0 >> 7) | 128))
(write r0)
(write-repeat r1)))
(if (r1 == ,(charset-id 'katakana-jisx0201))
;; (4) Katakana Part of Japanese JISX0201.1976
((r0 |= 128)
(write ?\x8e)
(write-repeat r0)))
,@w3m-ucs-generate-ncr-program)))
(setq w3m-input-coding-system 'w3m-euc-japan)
(provide 'w3m-ucs-e21)