[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: sb-emacswiki.el / sb-spigel.el
Quote [ TSUCHIYA Masatoshi * 3903. September 1993 ]
>> I've also create a shimbun modul for the emacswiki and spiegel. This
>> moduls doesn't use RSS. I think that's better.
> May be, and may be not.
> A module that does not depend on RSS can be more flexible than one
> based on RSS. For example, Spiegel Online provides the RSS that
> contains only 10 articles, which are a quite small part of available
> articles. This means that you can lost some articles when more
> articles than 10 have been published if you use a module based on RSS.
Yep and you can not create a Date Header because Spiegel Online RSS
has no creation informations for their articles.
> However, RSS-based approach has a big advantage: a module based on RSS
> is more easy for maintenance than the other. Generally speaking, a
> module that does not depend on RSS uses several regular expressions to
> detect titles, authors, dates, and URLs. Because these regular
> expressions are quite fragile, you will have to pay a heavy cost to
> keep them valid against site changes.
> Because these factors are variable through sites, we have to decide
> which approach is better for each site. If you complete your
> sb-spigel.el and post it to this mailing list, we will be able to
> discuss about it.
OK.
> A possible solution is to rename sb-spigel.el based
> on RSS to sb-spigel-rss.el.
Sounds good.
>> See
>> <87zn8m4t7g.fsf@news.gentoo.b-j-t.de>
>> for sb-emacswiki.el.
> Because I do not use sb-emacswiki.el usually and do not have enough
> knowledge about EmacsWiKi, I can not decide which approach is better.
> Could you discuss about it, David and Markus?
The RSS variant seem to be better because the feed of the emacswiki
provide the same informations as the http site.
> By the way, I think that it is quite important for developers to post
> their developing modules to this mailing list in order to avoid
> duplication of work like this case.
OK, here is a shimbun modul for the wikipedia. Currently only for the
englisch and german site. I've also made sb-tagesschau.el, but it
has currently the same error as sb-spiegel.el. :-/
;;; sb-wikipedia.el --- w3m shimbun for wikipedia.org
;; Copyright (C) 2004 Markus Knittig
;; Author: Markus Knittig <markus.knittig@arcor.de>
;; Version: 0.1
;; Keywords: emacs-w3m, shimbun, hypermedia
;; This file is not part of GNU Emacs.
;; This is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.
;; This is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.
;;; Commentary:
;;; Code:
(require 'shimbun)
(luna-define-class shimbun-wikipedia (shimbun) ())
(defvar shimbun-wikipedia-url "http://wikipedia.org")
(defvar shimbun-wikipedia-x-face-alist
'(("default" . "X-Face: *WGD57Ev+LTW]?d:._VW@3'?KVRq5fr%&dMvDc3$|p'b<nsDYi0D,J9oMJ5FtZOGbf\"eS,(\
myuD'2)JjXAH!/0%M)lcPTsy#(kn@A9mK('jZh<L)yp^jLM@s\"Yr[#Z@p>j\"Ykh)9j9?w7UYKg\\0gc\
b.=7]PZsDr=XN%:!*if`?^j-\"$eL4X\\m64L\\G1b0A;+^kxQg}5q-~;.0zL|}9@tyS>*IfqhRWg)nm(\
vK=,?&IG7!33R\",^m4}\"7Qu\\mlk|+|lbfS<^W/bb\"@N]Vv:&Cb`#LRWqc/rfi'_IF/`v*Ri^r`zzAA\
\\MERymc{s")))
(defvar shimbun-wikipedia-group-path-alist
'(("RecentChanges.de" . "/de/Spezial:Recentchanges")
("RecentChanges.en" . "/en/Special:Recentchanges")))
(defconst shimbun-wikipedia-month-de-alist
'(("Jan" . "Jan")
("Feb" . "Feb")
("Mär" . "Mar")
("Apr" . "Apr")
("Mai" . "May")
("Jun" . "Jun")
("Jul" . "Jul")
("Aug" . "Aug")
("Sep" . "Sep")
("Okt" . "Oct")
("Nov" . "Nov")
("Dez" . "Dec")))
(defvar shimbun-wikipedia-month-de
(let ((hash (make-hash-table :test 'equal)))
(dolist (pair shimbun-wikipedia-month-de-alist)
(puthash (car pair) (cdr pair) hash))
hash))
(defconst shimbun-wikipedia-month-de-regexp
(regexp-opt (mapcar 'car
(append shimbun-wikipedia-month-de-alist))))
(defvar shimbun-wikipedia-groups
(mapcar 'car shimbun-wikipedia-group-path-alist))
(luna-define-method shimbun-index-url ((shimbun shimbun-wikipedia))
(concat shimbun-wikipedia-url
(cdr (assoc (shimbun-current-group-internal shimbun)
shimbun-wikipedia-group-path-alist))))
(defun shimbun-wikipedia-get-headers ()
(let ((regexp "<li> (<a href=\"\\([^\"]+\\)\".+?>.+?</a>) (.+?) \. \..+?<a.+?>\\(.+?\\)</a>;\
\\([0-9]\\{2\\}:[0-9]\\{2\\}\\) \. \. <a.+?>\\(.+?\\)</a> (.+?)\\(<em>.+?</em>\\)?</li>")
(regexp-day "<h4>\\([0-9]\\{2\\}\\)\.? \\([A-Za-z]\\{3\\}\\) \\([0-9]\\{4\\}\\)")
subject from date id url headers summary day month year lang)
(if (string-match "RecentChanges.\\(.*\\)" (shimbun-current-group-internal shimbun))
(setq lang (match-string 1 (shimbun-current-group-internal shimbun)))
(error "Wrong group format"))
(catch 'stop
;;find the day
(while (re-search-forward regexp-day nil t nil)
(setq day (match-string 1))
(setq month (match-string 2))
(setq year (match-string 3))
(when (equal lang "de")
(setq month (gethash month shimbun-wikipedia-month-de)))
(setq date (concat day " " month " " year))
;;find the entry
(while (re-search-forward regexp nil t nil)
(setq url (w3m-expand-url (match-string 1) (concat "http://" lang ".wikipedia.org")))
(setq subject (match-string 2))
(setq author (match-string 4))
(when (match-string 5)
(setq summary (match-string 5)))
;;create date
;;FIXME: right timezone
(setq date (concat date " " (match-string 3) " +0000"))
;;fix url
(while (string-match "&" url)
(setq url (replace-match "&" nil t url)))
;;check if summary is present
(when summary
(when (string-match "<em> (\\(.+?\\))</em>" summary)
(setq subject (concat subject " - " (match-string 1 summary)))))
;; create from
(setq from (concat author " <invalid@" lang ".wikipedia.org>"))
;;create an id
(setq id (concat "<" date "." subject "@" lang ".wikipedia.org>" ))
(when (shimbun-search-id shimbun id)
(throw 'stop nil))
(push (shimbun-make-header
0 (shimbun-mime-encode-string subject)
(shimbun-mime-encode-string from)
date id "" 0 0 url)
headers))))
headers))
(luna-define-method shimbun-get-headers ((shimbun shimbun-wikipedia) &optional range)
(shimbun-wikipedia-get-headers))
(defun shimbun-wikipedia-wash-article (header)
(save-excursion
(let ((regexp-begin "<div id='article'>")
(regexp-end "<p>\n</div><br clear=all>")
begin-region end-region)
(when (re-search-forward regexp-begin nil t)
(forward-line -1)
(setq end-region (point))
(setq begin-region (point-min))
(delete-region begin-region end-region))
(when (re-search-forward regexp-end nil t)
(setq begin-region (point))
(setq end-region (point-max))
(delete-region begin-region end-region)))))
;; (luna-define-method shimbun-make-contents :before ((shimbun shimbun-wikipedia) header)
;; (shimbun-wikipedia-wash-article header))
;; (shimbun-header-insert-and-buffer-string shimbun header nil t))
(luna-define-method shimbun-make-contents ((shimbun shimbun-wikipedia) header)
(shimbun-wikipedia-wash-article header)
(shimbun-header-insert-and-buffer-string shimbun header "iso-8859-1" t))
(provide 'sb-wikipedia)
;;; sb-wikipedia.el ends here.
Best regards,
Markus