;;; sb-heise.el --- heise online shimbun backend ;; Copyright (C) 2004 David Hansen ;; Author: David Hansen ;; Keywords: news ;; This file is a part of shimbun. ;; This is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; This is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;; Boston, MA 02111-1307, USA. ;;; Commentary: ;;; Code: (require 'shimbun) (luna-define-class shimbun-heise (shimbun) ()) (defvar shimbun-heise-url "http://www.heise.de") (defvar shimbun-heise-group-path-alist '(("news" . "/newsticker/") ("telepolis" . "/tp/"))) (defvar shimbun-heise-content-start "\\(\\|\\|\\)") (defvar shimbun-heise-content-end "\\(\\|\\|\\)") (defvar shimbun-heise-x-face-alist '(("default" . "X-Face: #RVD(kjrS;RY\"2yH]w.1U,ZC_DbR,9{tQnhyYe|,\\J)\"\ C*o1{%`*]WwtAuo;reeq_koTr=oIKXFB4#bS'tSdz.Mc%t~-@873uYV>SMjL7D6K$M4L0Up{D\ _rBgD*Xj,t;iPKWh:!B}ijDOoCxs!}rs&(r-TLwU8=>@[w^H(>^u$wM*}\":9LANQs)1\"cZP\ 6ftp?9>b&|rkGR+VWIlD:%?,Fvi8h?q2H+pVqq5#Z9*k2q7.P)0$x!A)T"))) (defvar shimbun-heise-groups (mapcar 'car shimbun-heise-group-path-alist)) (luna-define-method shimbun-index-url ((shimbun shimbun-heise)) (concat shimbun-heise-url (cdr (assoc (shimbun-current-group-internal shimbun) shimbun-heise-group-path-alist)))) (defun shimbun-heise-get-newsticker-headers (shimbun) (let ((regexp "\\([^<]+\\)") (from "Heise Online News ") (date "") (id) (url) (subject) (headers)) (catch 'stop (while (re-search-forward regexp nil t nil) (setq id (match-string 1)) (setq url (w3m-expand-url (concat "meldung/" id) (shimbun-index-url shimbun))) (setq subject (match-string 2)) (setq id (concat "")) (when (shimbun-search-id shimbun id) (throw 'stop nil)) (message id) (push (shimbun-make-header 0 (shimbun-mime-encode-string subject) (shimbun-mime-encode-string from) date id "" 0 0 url) headers))) headers)) (defconst sb-heise-tp-url-re (concat "]*class=\"inhalt-head\"[^>]*>" "[^<]*]*>" "\\(.*?\n?.*?\\)

")) (defconst sb-heise-tp-auth-date-re (concat "]*class=\"inhalt-autor\"[^>]*>" "\\([^<]+\\)]*class=\"date\"[^>]*>" "[^<]*\\([0-9]+\\.[0-9]+\\.[0-9]+\\)[^<]*" "")) (defun shimbun-heise-get-telepolis-headers (shimbun) (let (headers) (catch 'stop (while (re-search-forward sb-heise-tp-url-re nil t nil) (let ((url (match-string 1)) (subject (match-string 2))) (when (re-search-forward sb-heise-tp-auth-date-re nil t nil) (let ((author (concat (match-string 1) " ")) (date (match-string 2)) (id)) (setq url (w3m-expand-url url shimbun-heise-url)) (string-match "/[0-9]+/[0-9]+/" url) (setq id (concat "")) (when (shimbun-search-id shimbun id) (throw 'stop nil)) (string-match "\\([0-9]+\\)\\.\\([0-9]+\\)\\.\\([0-9]+\\)" date) (push (shimbun-create-header 0 subject author (shimbun-make-date-string (string-to-number (match-string 3 date)) ; year (string-to-number (match-string 2 date)) ; month (string-to-number (match-string 1 date)) ; day "00:00" ; time ;; FIXME: timezone is always wrong, slightly better than ;; the default "+0900" "+0000") id "" 0 0 url) headers)))))) headers)) (luna-define-method shimbun-get-headers ((shimbun shimbun-heise) &optional range) (if (equal (shimbun-current-group-internal shimbun) "news") (shimbun-heise-get-newsticker-headers shimbun) (shimbun-heise-get-telepolis-headers shimbun))) (defun shimbun-heise-wash-newsticker-article (header) (save-excursion ;; get the real date (let ((regexp-date-begin "") (regexp-date-end "") (regexp-date (concat "\\([0-9]+\\)\\.\\([0-9]+\\)\\." "\\([0-9]+\\)[ \t]+\\([0-9]+\\:[0-9]+\\)")) (tmp-point) (bound-point)) (when (setq tmp-point (re-search-forward regexp-date-begin nil t nil)) (when (setq bound-point (re-search-forward regexp-date-end nil t nil)) (goto-char tmp-point) (when (re-search-forward regexp-date bound-point t nil) (shimbun-header-set-date header (shimbun-make-date-string (string-to-number (match-string 3)) ; year (string-to-number (match-string 2)) ; month (string-to-number (match-string 1)) ; day (match-string 4) ; time ;; FIXME: timezone is always wrong, slightly better than the ;; default "+0900" "+0000")))))) ;; get the real from (let ((regexp-from-begin "\\|") (regexp-from-end "") (regexp-from (concat "(]*>\\([^<]+\\)")) (tmp-point) (bound-point)) (when (setq tmp-point (re-search-forward regexp-from-begin nil t nil)) (when (setq bound-point (re-search-forward regexp-from-end nil t nil)) (goto-char tmp-point) (when (re-search-forward regexp-from bound-point t nil) (shimbun-header-set-from header (shimbun-mime-encode-string (concat "Heise Online News, " (match-string 2) " <" (match-string 1) ">"))))))) ;; strip ads (goto-char (point-min)) (let ((regexp-ad-begin "\\|") (regexp-ad-end "") (regexp-ad "") (tmp-point) (bound-min) (bound-max)) (when (setq bound-min (re-search-forward regexp-ad-begin nil t nil)) (when (setq bound-max (re-search-forward regexp-ad-end nil t nil)) (goto-char bound-min) (while (re-search-forward regexp-ad bound-max t nil) (let ((begin-region (re-search-backward "" bound-max t nil))) (when (and begin-region end-region) (delete-region begin-region end-region))))))))) (defun shimbun-heise-wash-telepolis-article (header) (save-excursion ;; strip nasty "download" images (goto-char (point-min)) (while (re-search-forward "" nil t nil) (delete-region (point) (re-search-forward "" nil t nil))))) (luna-define-method shimbun-make-contents :before ((shimbun shimbun-heise) header) (if (equal (shimbun-current-group-internal shimbun) "news") (shimbun-heise-wash-newsticker-article header) (shimbun-heise-wash-telepolis-article header))) (provide 'sb-heise) ;;; sb-heise.el ends here