[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

Patches for sb-heise, sb-perlentaucher-de



Dear maintainers,

Please find attached a patch for sb-heise and sb-perlentaucher-de which
should make these shimbuns work again.

Regards,
David
Index: sb-heise.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-heise.el,v
retrieving revision 1.23
diff -u -r1.23 sb-heise.el
--- sb-heise.el	23 Jul 2008 22:08:10 -0000	1.23
+++ sb-heise.el	5 Oct 2009 13:18:35 -0000
@@ -61,16 +61,16 @@
 
 (defun shimbun-heise-get-newsticker-headers (shimbun)
   (let ((regexp
-	 "<a href=\"/newsticker/\\([^\"]+?\\)/meldung/\\([0-9]+\\)\"[^>]*>\\([^<]+\\)</a>")
+	 "<a href=\"/newsticker/\\(meldung/.+\\.html\\)\"[^>]*>\\([^<]+\\)</a>")
 	(from "Heise Online News <invalid@xxxxxxxx>")
 	(date "") (longurl) (id) (url) (subject) (headers))
     (catch 'stop
       (while (re-search-forward regexp nil t nil)
 	(setq longurl (match-string 1))
-	(setq id (match-string 2))
-	(setq url (shimbun-expand-url (concat longurl "/meldung/" id)
+	(setq id (md5 longurl))
+	(setq url (shimbun-expand-url longurl
 				      (shimbun-index-url shimbun)))
-	(setq subject (match-string 3))
+	(setq subject (match-string 2))
 	(setq id (concat "<newsticker" id "@heise.de>"))
 	(when (shimbun-search-id shimbun id)
 	  (throw 'stop nil))
@@ -135,8 +135,8 @@
   (save-excursion
 
     ;; get the real date
-    (let ((regexp-date-begin "<!-- \\*\\*\\* tmpl \\*\\*\\* -->")
-	  (regexp-date-end "<!-- obere News-Navigation -->")
+    (let ((regexp-date-begin "<div id=\"mitte_news\">")
+	  (regexp-date-end "<div class=\"news_logo\">")
 	  (regexp-date (concat "\\([0-9]+\\)\\.\\([0-9]+\\)\\."
 			       "\\([0-9]+\\)[ \t]+\\([0-9]+\\:[0-9]+\\)"))
 	  (tmp-point) (bound-point))
@@ -156,10 +156,9 @@
 	      "+0000"))))))
 
     ;; get the real from
-    (let ((regexp-from-begin "<!-- Meldung -->\\|<HEISETEXT>")
-	  (regexp-from-end "<!-- untere News-Navigation -->")
-	  (regexp-from (concat "(<a href=\"mailto:\\([^@]+@ct.heise.de\\)\""
-			       "[^>]*>\\([^<]+\\)</a>"))
+    (let ((regexp-from-begin "<span class=\"ISI_IGNORE\">")
+	  (regexp-from-end "<br class=\"clear\"")
+	  (regexp-from "(<a href=\"mailto:\\(.+?\\)\" title=\"\\(.+?\\)\"")
 	  (tmp-point) (bound-point))
       (when (setq tmp-point (re-search-forward regexp-from-begin nil t nil))
 	(when (setq bound-point (re-search-forward regexp-from-end nil t nil))
@@ -168,18 +167,13 @@
 	    (shimbun-header-set-from
 	     header
 	     (shimbun-mime-encode-string
-	      (concat "Heise Online News, "
-		      (match-string 2)
+	      (concat (match-string 2)
 		      " <"
 		      (match-string 1)
 		      ">")))))))
+    (shimbun-remove-tags "<head>" "<div class=\"meldung_wrapper\">")
+    (shimbun-remove-tags "<span class=\"ISI_IGNORE\">" "</body>")))
 
-    ;; strip ads
-    (goto-char (point-min))
-    (let (beg end)
-      (while (and (setq beg (re-search-forward "<!-- b?cadv -->" nil t))
-                  (setq end (re-search-forward "<!-- /b?cadv -->" nil t)))
-        (delete-region beg end)))))
 
 
 (defun shimbun-heise-wash-telepolis-article (header)
Index: sb-perlentaucher-de.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-perlentaucher-de.el,v
retrieving revision 1.3
diff -u -r1.3 sb-perlentaucher-de.el
--- sb-perlentaucher-de.el	21 Sep 2008 00:39:06 -0000	1.3
+++ sb-perlentaucher-de.el	5 Oct 2009 13:18:36 -0000
@@ -46,13 +46,6 @@
 (luna-define-method shimbun-clear-contents :before ((shimbun
 						     shimbun-perlentaucher-de)
 						    header)
-  ;; search teaser for real article url and load it
-  (goto-char (point-min))
-  (re-search-forward ".*<a href=\"\\(.*?\\)\">mehr lesen</a>" nil t)
-  (let ((url (format "http://www.perlentaucher.de%s" (match-string 1))))
-    (erase-buffer)
-    (shimbun-retrieve-url url))
-  ;; remove stuff
   (shimbun-remove-tags "<div class=\"tools\">" "Merkzettel</a></li></ul>")
   (shimbun-remove-tags "<div class=\"box2 jumper\">" "</div>")
   (shimbun-remove-tags "<a href=\"#top\">" "</a>"))
Index: ChangeLog
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/ChangeLog,v
retrieving revision 1.210
diff -u -r1.210 ChangeLog
--- ChangeLog	1 Oct 2009 11:50:33 -0000	1.210
+++ ChangeLog	5 Oct 2009 13:18:37 -0000
@@ -1,3 +1,11 @@
+2009-10-05  David Engster  <dengste@xxxxxx>
+
+	* sb-heise.el (shimbun-heise-get-newsticker-headers)
+	(shimbun-heise-wash-newsticker-article): Updated regular expressions.
+	Use shimbun-remove-tags instead of delete-region.
+	* sb-perlentaucher-de.el (shimbun-clear-contents): RSS does not point
+	to teaser anymore, so loading of new URL removed.
+
 2008-10-01  Katsumi Yamaoka  <yamaoka@xxxxxxx>
 
 	* sb-kantei.el (shimbun-index-url): Update url for m-magazine-en.