[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Patches for sb-heise, sb-perlentaucher-de
- From: David Engster <deng@xxxxxxxxxxxxxxx>
- Date: Mon, 05 Oct 2009 15:20:24 +0200
- X-ml-name: emacs-w3m
- X-mail-count: 11066
Dear maintainers,
Please find attached a patch for sb-heise and sb-perlentaucher-de which
should make these shimbuns work again.
Regards,
David
Index: sb-heise.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-heise.el,v
retrieving revision 1.23
diff -u -r1.23 sb-heise.el
--- sb-heise.el 23 Jul 2008 22:08:10 -0000 1.23
+++ sb-heise.el 5 Oct 2009 13:18:35 -0000
@@ -61,16 +61,16 @@
(defun shimbun-heise-get-newsticker-headers (shimbun)
(let ((regexp
- "<a href=\"/newsticker/\\([^\"]+?\\)/meldung/\\([0-9]+\\)\"[^>]*>\\([^<]+\\)</a>")
+ "<a href=\"/newsticker/\\(meldung/.+\\.html\\)\"[^>]*>\\([^<]+\\)</a>")
(from "Heise Online News <invalid@xxxxxxxx>")
(date "") (longurl) (id) (url) (subject) (headers))
(catch 'stop
(while (re-search-forward regexp nil t nil)
(setq longurl (match-string 1))
- (setq id (match-string 2))
- (setq url (shimbun-expand-url (concat longurl "/meldung/" id)
+ (setq id (md5 longurl))
+ (setq url (shimbun-expand-url longurl
(shimbun-index-url shimbun)))
- (setq subject (match-string 3))
+ (setq subject (match-string 2))
(setq id (concat "<newsticker" id "@heise.de>"))
(when (shimbun-search-id shimbun id)
(throw 'stop nil))
@@ -135,8 +135,8 @@
(save-excursion
;; get the real date
- (let ((regexp-date-begin "<!-- \\*\\*\\* tmpl \\*\\*\\* -->")
- (regexp-date-end "<!-- obere News-Navigation -->")
+ (let ((regexp-date-begin "<div id=\"mitte_news\">")
+ (regexp-date-end "<div class=\"news_logo\">")
(regexp-date (concat "\\([0-9]+\\)\\.\\([0-9]+\\)\\."
"\\([0-9]+\\)[ \t]+\\([0-9]+\\:[0-9]+\\)"))
(tmp-point) (bound-point))
@@ -156,10 +156,9 @@
"+0000"))))))
;; get the real from
- (let ((regexp-from-begin "<!-- Meldung -->\\|<HEISETEXT>")
- (regexp-from-end "<!-- untere News-Navigation -->")
- (regexp-from (concat "(<a href=\"mailto:\\([^@]+@ct.heise.de\\)\""
- "[^>]*>\\([^<]+\\)</a>"))
+ (let ((regexp-from-begin "<span class=\"ISI_IGNORE\">")
+ (regexp-from-end "<br class=\"clear\"")
+ (regexp-from "(<a href=\"mailto:\\(.+?\\)\" title=\"\\(.+?\\)\"")
(tmp-point) (bound-point))
(when (setq tmp-point (re-search-forward regexp-from-begin nil t nil))
(when (setq bound-point (re-search-forward regexp-from-end nil t nil))
@@ -168,18 +167,13 @@
(shimbun-header-set-from
header
(shimbun-mime-encode-string
- (concat "Heise Online News, "
- (match-string 2)
+ (concat (match-string 2)
" <"
(match-string 1)
">")))))))
+ (shimbun-remove-tags "<head>" "<div class=\"meldung_wrapper\">")
+ (shimbun-remove-tags "<span class=\"ISI_IGNORE\">" "</body>")))
- ;; strip ads
- (goto-char (point-min))
- (let (beg end)
- (while (and (setq beg (re-search-forward "<!-- b?cadv -->" nil t))
- (setq end (re-search-forward "<!-- /b?cadv -->" nil t)))
- (delete-region beg end)))))
(defun shimbun-heise-wash-telepolis-article (header)
Index: sb-perlentaucher-de.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-perlentaucher-de.el,v
retrieving revision 1.3
diff -u -r1.3 sb-perlentaucher-de.el
--- sb-perlentaucher-de.el 21 Sep 2008 00:39:06 -0000 1.3
+++ sb-perlentaucher-de.el 5 Oct 2009 13:18:36 -0000
@@ -46,13 +46,6 @@
(luna-define-method shimbun-clear-contents :before ((shimbun
shimbun-perlentaucher-de)
header)
- ;; search teaser for real article url and load it
- (goto-char (point-min))
- (re-search-forward ".*<a href=\"\\(.*?\\)\">mehr lesen</a>" nil t)
- (let ((url (format "http://www.perlentaucher.de%s" (match-string 1))))
- (erase-buffer)
- (shimbun-retrieve-url url))
- ;; remove stuff
(shimbun-remove-tags "<div class=\"tools\">" "Merkzettel</a></li></ul>")
(shimbun-remove-tags "<div class=\"box2 jumper\">" "</div>")
(shimbun-remove-tags "<a href=\"#top\">" "</a>"))
Index: ChangeLog
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/ChangeLog,v
retrieving revision 1.210
diff -u -r1.210 ChangeLog
--- ChangeLog 1 Oct 2009 11:50:33 -0000 1.210
+++ ChangeLog 5 Oct 2009 13:18:37 -0000
@@ -1,3 +1,11 @@
+2009-10-05 David Engster <dengste@xxxxxx>
+
+ * sb-heise.el (shimbun-heise-get-newsticker-headers)
+ (shimbun-heise-wash-newsticker-article): Updated regular expressions.
+ Use shimbun-remove-tags instead of delete-region.
+ * sb-perlentaucher-de.el (shimbun-clear-contents): RSS does not point
+ to teaser anymore, so loading of new URL removed.
+
2008-10-01 Katsumi Yamaoka <yamaoka@xxxxxxx>
* sb-kantei.el (shimbun-index-url): Update url for m-magazine-en.