[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

Re: sb-emacswiki.el / sb-spigel.el

Quote [ TSUCHIYA Masatoshi * 3903. September 1993 ]

>> I've also create a shimbun modul for the emacswiki and spiegel.  This
>> moduls doesn't use RSS. I think that's better.
> May be, and may be not.
> A module that does not depend on RSS can be more flexible than one
> based on RSS.  For example, Spiegel Online provides the RSS that
> contains only 10 articles, which are a quite small part of available
> articles.  This means that you can lost some articles when more
> articles than 10 have been published if you use a module based on RSS.

Yep and you can not create a Date Header because Spiegel Online RSS
has no creation informations for their articles.

> However, RSS-based approach has a big advantage: a module based on RSS
> is more easy for maintenance than the other.  Generally speaking, a
> module that does not depend on RSS uses several regular expressions to
> detect titles, authors, dates, and URLs.  Because these regular
> expressions are quite fragile, you will have to pay a heavy cost to
> keep them valid against site changes.
> Because these factors are variable through sites, we have to decide
> which approach is better for each site.  If you complete your
> sb-spigel.el and post it to this mailing list, we will be able to
> discuss about it.


> A possible solution is to rename sb-spigel.el based
> on RSS to sb-spigel-rss.el.

Sounds good.

>> See
>> <87zn8m4t7g.fsf@news.gentoo.b-j-t.de>
>> for sb-emacswiki.el.
> Because I do not use sb-emacswiki.el usually and do not have enough
> knowledge about EmacsWiKi, I can not decide which approach is better.
> Could you discuss about it, David and Markus?

The RSS variant seem to be better because the feed of the emacswiki
provide the same informations as the http site.

> By the way, I think that it is quite important for developers to post
> their developing modules to this mailing list in order to avoid
> duplication of work like this case.

OK, here is a shimbun modul for the wikipedia. Currently only for the
englisch and german site. I've also made sb-tagesschau.el, but it
has currently the same error as sb-spiegel.el. :-/

;;; sb-wikipedia.el --- w3m shimbun for wikipedia.org

;; Copyright (C) 2004 Markus Knittig

;; Author: Markus Knittig <markus.knittig@arcor.de>
;; Version: 0.1
;; Keywords: emacs-w3m, shimbun, hypermedia

;; This file is not part of GNU Emacs.

;; This is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; This is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;;; Code:

(require 'shimbun)

(luna-define-class shimbun-wikipedia (shimbun) ())

(defvar shimbun-wikipedia-url "http://wikipedia.org";)

(defvar shimbun-wikipedia-x-face-alist
  '(("default" . "X-Face: *WGD57Ev+LTW]?d:._VW@3'?KVRq5fr%&dMvDc3$|p'b<nsDYi0D,J9oMJ5FtZOGbf\"eS,(\

(defvar shimbun-wikipedia-group-path-alist
  '(("RecentChanges.de" . "/de/Spezial:Recentchanges")
    ("RecentChanges.en" . "/en/Special:Recentchanges")))

(defconst shimbun-wikipedia-month-de-alist
  '(("Jan" . "Jan")
    ("Feb" . "Feb")
    ("Mär" . "Mar")
    ("Apr" . "Apr")
    ("Mai" . "May")
    ("Jun" . "Jun")
    ("Jul" . "Jul")
    ("Aug" . "Aug")
    ("Sep" . "Sep")
    ("Okt" . "Oct")
    ("Nov" . "Nov")
    ("Dez" . "Dec")))

(defvar shimbun-wikipedia-month-de
  (let ((hash (make-hash-table :test 'equal)))
    (dolist (pair shimbun-wikipedia-month-de-alist)
      (puthash (car pair) (cdr pair) hash))

(defconst shimbun-wikipedia-month-de-regexp
  (regexp-opt (mapcar 'car
		      (append shimbun-wikipedia-month-de-alist))))

(defvar shimbun-wikipedia-groups
  (mapcar 'car shimbun-wikipedia-group-path-alist))
(luna-define-method shimbun-index-url ((shimbun shimbun-wikipedia))
  (concat shimbun-wikipedia-url
     	  (cdr (assoc (shimbun-current-group-internal shimbun)

(defun shimbun-wikipedia-get-headers ()
  (let ((regexp "<li> (<a href=\"\\([^\"]+\\)\".+?>.+?</a>) (.+?) \. \..+?<a.+?>\\(.+?\\)</a>;\
 \\([0-9]\\{2\\}:[0-9]\\{2\\}\\) \. \. <a.+?>\\(.+?\\)</a> (.+?)\\(<em>.+?</em>\\)?</li>")
	(regexp-day "<h4>\\([0-9]\\{2\\}\\)\.? \\([A-Za-z]\\{3\\}\\) \\([0-9]\\{4\\}\\)")
	subject from date id url headers summary day month year lang)
    (if (string-match "RecentChanges.\\(.*\\)" (shimbun-current-group-internal shimbun))
	(setq lang (match-string 1 (shimbun-current-group-internal shimbun)))
      (error "Wrong group format"))
    (catch 'stop
      ;;find the day
      (while (re-search-forward regexp-day nil t nil)
	(setq day (match-string 1))
	(setq month (match-string 2))
	(setq year (match-string 3))
	(when (equal lang "de")
	  (setq month (gethash month shimbun-wikipedia-month-de)))
	(setq date (concat day " " month " " year))
	;;find the entry
	(while (re-search-forward regexp nil t nil)
	  (setq url (w3m-expand-url (match-string 1) (concat "http://"; lang ".wikipedia.org")))
	  (setq subject (match-string 2))
 	  (setq author (match-string 4))
	  (when (match-string 5)
	    (setq summary (match-string 5)))
	  ;;create date
	  ;;FIXME: right timezone
	  (setq date (concat date " " (match-string 3) " +0000"))
	  ;;fix url
 	  (while (string-match "&amp;" url)
 	    (setq url (replace-match "&" nil t url)))
	  ;;check if summary is present
 	  (when summary
	    (when (string-match "<em> (\\(.+?\\))</em>" summary)
	      (setq subject (concat subject " - " (match-string 1 summary)))))
	  ;; create from
	  (setq from (concat author " <invalid@" lang ".wikipedia.org>"))
	  ;;create an id
	  (setq id (concat "<" date "." subject "@" lang ".wikipedia.org>" ))
	  (when (shimbun-search-id shimbun id)
	    (throw 'stop nil))
	  (push (shimbun-make-header
		 0 (shimbun-mime-encode-string subject)
		 (shimbun-mime-encode-string from)
		 date id "" 0 0 url)

(luna-define-method shimbun-get-headers ((shimbun shimbun-wikipedia) &optional range)

(defun shimbun-wikipedia-wash-article (header)
    (let ((regexp-begin "<div id='article'>")
	  (regexp-end "<p>\n</div><br clear=all>")
	  begin-region end-region)
    (when (re-search-forward regexp-begin nil t)
      (forward-line -1)
      (setq end-region (point))
      (setq begin-region (point-min))
      (delete-region begin-region end-region))
    (when (re-search-forward regexp-end nil t)
      (setq begin-region (point))
      (setq end-region (point-max))
      (delete-region begin-region end-region)))))

;; (luna-define-method shimbun-make-contents :before ((shimbun shimbun-wikipedia) header)
;;  		    (shimbun-wikipedia-wash-article header))
;;   (shimbun-header-insert-and-buffer-string shimbun header nil t))

(luna-define-method shimbun-make-contents ((shimbun shimbun-wikipedia) header)
		    (shimbun-wikipedia-wash-article header)
		    (shimbun-header-insert-and-buffer-string shimbun header "iso-8859-1" t))

(provide 'sb-wikipedia)
;;; sb-wikipedia.el ends here.

Best regards,