[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

sb-rss.el



大場です。

丁度 XML でなにかしたかったところなので sb-rss.el を試験的に実装してみ
ました。アイディアは土屋さんの [emacs-w3m:05100] をベースとして
nnrss.el を参考にしています。ありがとうございます。

いろいろと不備、制限があります。また、Gnus の nnrss.el には、まだまだ
機能的に劣ります。

* 継承することを考慮していない
* msg-id がダメすぎる
* 日付も同様にダメダメ
* UTF-8 に決め打ち。Mule-UCS 等が必要
* xml.el に依存する(Emacs21 以上じゃないと付属しない?)

これらは追々解決するとして、サマリー表示と取り合えずのコンテンツ表示が
案外ラクチンに実装できたので、たたき台として送ることにしました。

RSS を参照するサイトとしてテストに使用したの以下のふたつです。

http://mag.autumn.org/
http://bulknews.net/

ビシバシ喝を入れていただけるとありがたいです。よろしくお願いします。
;;; sb-rss.el --- shimbun backend for RSS

;; Copyright (C) 2003 Koichiro Ohba  <koichiro@meadowy.org>

;; Author: Koichiro Ohba  <koichiro@meadowy.org>
;; Keywords: news

;; This file is a part of shimbun.

;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with this program; if not, you can either send email to this
;; program's maintainer or write to: The Free Software Foundation,
;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.

;;; Code:

(eval-when-compile (require 'cl))

(require 'shimbun)
(eval-when-compile
  (ignore-errors
    (require 'xml)))
(eval '(require 'xml))

(luna-define-class shimbun-rss (shimbun) ())

(defvar shimbun-rss-group-alist
  '(("mag.autumn.org" "http://mag.autumn.org/rss.aspx";)
    ("asahi.com" "http://bulknews.net/rss/rdf.cgi?Asahi";))
  "An alist of RSS shimbun group definition.")

(defvar shimbun-rss-coding-system 'utf-8)

(defvar shimbun-rss-content-start
  "<body>")

(defvar shimbun-rss-content-end
  "</body>")

(luna-define-method shimbun-groups ((shimbun shimbun-rss))
  (mapcar 'car shimbun-rss-group-alist))

(luna-define-method shimbun-index-url ((shimbun shimbun-rss))
  (cadr (assoc (shimbun-current-group-internal shimbun)
	       shimbun-rss-group-alist)))

(luna-define-method shimbun-headers ((shimbun shimbun-rss)
				     &optional range)
  (with-temp-buffer
    (shimbun-retrieve-url (shimbun-index-url shimbun) 'reload 'binary)
    (set-buffer-multibyte t)
    (decode-coding-region (point-min) (point-max)
			  (shimbun-coding-system-internal shimbun))
    (let (headers from subject date id url stime st body
		  author extra
		  xml dc-ns rdf-ns rss-ns content-ns)
      (setq xml (xml-parse-region (point-min) (point-max)))
      ;; See
      ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html
      ;; for more RSS namespaces.
      (setq dc-ns (shimbun-rss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/";)
	    rdf-ns (shimbun-rss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#";)
	    rss-ns (shimbun-rss-get-namespace-prefix xml "http://purl.org/rss/1.0/";)
	    content-ns (shimbun-rss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/";))
      (dolist (item (nreverse (shimbun-rss-find-el (intern (concat rss-ns "item")) xml)))
	(when (and (listp item)
		   (eq (intern (concat rss-ns "item")) (car item))
		   (setq url (shimbun-rss-node-text rss-ns 'link (cddr item)))
		   )
	  (setq subject (shimbun-rss-node-text rss-ns 'title item))
;; 	  (setq extra (or (shimbun-rss-node-text content-ns 'encoded item)
;; 			  (shimbun-rss-node-text rss-ns 'description item)))
	  (setq from (or (shimbun-rss-node-text rss-ns 'author item)
			   (shimbun-rss-node-text dc-ns 'creator item)))
	  (setq date (or (shimbun-rss-node-text dc-ns 'date item)
			 (shimbun-rss-node-text rss-ns 'pubDate item)))
	  (setq id (shimbun-rss-make-id 
		    url
		    (shimbun-current-group-internal shimbun)))
	  (push (shimbun-make-header
		 0
		 (shimbun-mime-encode-string subject)
		 (shimbun-mime-encode-string from)
		 date id "" 0 0 url)
		headers)))
      headers)))

;; (luna-define-method shimbun-article ((shimbun shimbun-rss) header
;; 				     &optional outbuf)
;;   ())


;;; Internal functions

;;(defun shimbun-rss-make-time (iso-676-date)
;;  (let tm

(defun shimbun-rss-make-id (url group)
  (format "<%s@%s>"
	  url group))

;;; XML functions

(defun shimbun-rss-node-text (namespace local-name element)
  (let* ((node (assq (intern (concat namespace (symbol-name local-name)))
		     element))
	 (text (if (and node (listp node))
		   (shimbun-rss-node-just-text node)
		 node))
	 (cleaned-text (if text (shimbun-rss-replace-in-string
				 text "^[\000-\037\177]+\\|^ +\\| +$" ""))))
    (if (string-equal "" cleaned-text)
	nil
      cleaned-text)))

(defun shimbun-rss-node-just-text (node)
  (if (and node (listp node))
      (mapconcat 'shimbun-rss-node-just-text (cddr node) " ")
    node))

(defun shimbun-rss-replace-in-string (string regexp newtext &optional literal)
      (let ((start 0) tail)
	(while (string-match regexp string start)
	  (setq tail (- (length string) (match-end 0)))
	  (setq string (replace-match newtext nil literal string))
	  (setq start (- (length string) tail))))
      string)

(defun shimbun-rss-find-el (tag data &optional found-list)
  "Find the all matching elements in the data.  Careful with this on
large documents!"
  (if (listp data)
      (mapcar (lambda (bit)
		(if (car-safe bit)
		    (progn (if (equal tag (car bit))
			       (setq found-list
				     (append found-list
					     (list bit))))
			   (if (and (listp (car-safe (caddr bit)))
				    (not (stringp (caddr bit))))
			       (setq found-list
				     (append found-list
					     (shimbun-rss-find-el
					      tag (caddr bit))))
			     (setq found-list
				   (append found-list
					   (shimbun-rss-find-el
					    tag (cddr bit))))))))
		data))
  found-list)

(defun shimbun-rss-get-namespace-prefix (el uri)
  "Given EL (containing a parsed element) and URI (containing a string
that gives the URI for which you want to retrieve the namespace
prefix), return the prefix."
  (let* ((prefix (car (rassoc uri (cadar el))))
	 (nslist (if prefix 
		     (split-string (symbol-name prefix) ":")))
	 (ns (cond ((eq (length nslist) 1) ; no prefix given
		    "")
		   ((eq (length nslist) 2) ; extract prefix
		    (cadr nslist)))))
    (if (and ns (not (equal ns "")))
	(concat ns ":")
      ns)))


(provide 'sb-rss)

;;; sb-rss.el ends here
-- 
koichiro <koichiro@meadowy.org>