[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

Shimbun updates



Attached are two patches for the slashdot and the sueddeutsche-de
shimbuns. Please see the ChangeLog entry for details.

Regards,
David
Index: ChangeLog
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/ChangeLog,v
retrieving revision 1.160
diff -u -r1.160 ChangeLog
--- ChangeLog	16 Dec 2008 11:04:11 -0000	1.160
+++ ChangeLog	23 Dec 2008 11:59:19 -0000
@@ -1,3 +1,22 @@
+2008-12-23  David Engster  <dengste@xxxxxx>
+
+	* sb-sueddeutsche-de.el (shimbun-sueddeutsche-de-group-url): Removed
+	kino group since feed is broken.
+	(shimbun-get-headers, shimbun-rss-build-message-id): Adapt to new URL
+	scheme.
+
+	* sb-slashdot.el (sb-multi): Removed.
+	(shimbun-slashdot-group-url): Changed frontpage URL.
+	(shimbun-slashdot-regexp-section-id-subject): Adapt regexp.
+	(shimbun-slashdot-regexp-author-time): Changed for better date parsing.
+	(shimbun-slashdot-regexp-comment-system): Removed.
+	(shimbun-slashdot-get-headers): Adapt to new site design.  Improved
+	date parsing.  Immediately retrieve old comment system.  Removed
+	catch/throw since new articles need not be at the top.
+	(shimbun-multi-next-url): Removed.
+	(shimbun-clear-contents): Changed due to removal of sb-multi.  Set
+	quotes in italics.
+
 2008-12-16  Katsumi Yamaoka  <yamaoka@xxxxxxx>
 
 	* sb-itmedia.el (shimbun-itmedia-content-start): Don't exclude author's
Index: sb-slashdot.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-slashdot.el,v
retrieving revision 1.4
diff -u -r1.4 sb-slashdot.el
--- sb-slashdot.el	11 Nov 2008 00:09:29 -0000	1.4
+++ sb-slashdot.el	23 Dec 2008 11:59:45 -0000
@@ -23,12 +23,11 @@
 ;;; Code:
 
 (require 'shimbun)
-(require 'sb-multi)
 
-(luna-define-class shimbun-slashdot (shimbun-multi shimbun) ())
+(luna-define-class shimbun-slashdot (shimbun) ())
 
 (defvar shimbun-slashdot-group-url
-  '(("frontpage" "http://www.slashdot.org")
+  '(("frontpage" "http://slashdot.org")
     ("apple" "http://apple.slashdot.org")
     ("askslashdot" "http://ask.slashdot.org")
     ("books" "http://books.slashdot.org")
@@ -56,14 +55,14 @@
 Can be 'flat', 'thread', or 'nested'.")
 
 (defvar shimbun-slashdot-regexp-section-id-subject
-  "<\\(?:div\\|h3\\)[ \t]+class=\"\\(generaltitle\\|briefarticles\\|story\\)\"[^\0]*?\
-<a[ \t]+href=\".*slashdot.org/\\(.*?\\)/\\(.*?\\).shtml\".*?>\\(.*?\\)</a>")
+  "<\\s-*h3\\s-+class=\"story\"[^\0]*?<a\\s-+href=\"\
+\\(?:/*\\([a-zA-Z]+\\)?\\.?slashdot.org/article.pl\\?sid=\\(.*?\\)\
+\\|.*slashdot.org/\\(.*?\\)/\\(.*?\\).shtml\\)\
+\".*?>\\(.*?\\)</a>")
 
 (defvar shimbun-slashdot-regexp-author-time
-  "Posted[\t \n]+by[^a-zA-Z]*\\(.*\\)[^\0]*?@\\([0-9]+\\):\\([0-9]+\\)\\(AM\\|PM\\)")
-
-(defvar shimbun-slashdot-regexp-comment-system
-  "use[ \t]+<a[ \t]+href=\"\\(.+\\)\">[ \t]*the classic discussion system")
+  "Posted[\t \n]+by[^a-zA-Z]*\\(.*\\)[^\0]*?on\\s-+[a-zA-Z]+\\s-+\
+\\([a-zA-Z]+\\)\\s-+\\([0-9]+\\).+@\\([0-9]+\\):\\([0-9]+\\)\\(AM\\|PM\\)")
 
 (defvar shimbun-slashdot-groups
   (mapcar 'car shimbun-slashdot-group-url))
@@ -86,98 +85,97 @@
 
 (defun shimbun-slashdot-get-headers (shimbun)
   (let ((from "Slashdot <invalid@xxxxxxxxxxxx>")
-	hour minute date ampm id url subject headers section)
-    (catch 'stop
-      (while (re-search-forward shimbun-slashdot-regexp-section-id-subject
-				nil t)
-	(setq section (match-string 2))
-	(setq id (match-string 3))
-	(setq url (concat "http://www.slashdot.org/" section "/" id ".shtml"))
-	;; Make section prettier
+	(allmonths '("january" "february" "march" "april" "may" "june"
+		     "july" "august" "september" "october" "november"
+		     "december"))
+	month day hour minute date ampm id url subject headers section)
+    ;; Make article URL
+    (while (re-search-forward shimbun-slashdot-regexp-section-id-subject
+			      nil t)
+      (setq section (or (match-string 1) (match-string 3))
+	    id (or (match-string 2) (match-string 4))
+	    url (concat "http://" section ".slashdot.org/article.pl?sid=" id
+			"&simpledesign=1&lowbandwidth=1")
+	    subject (match-string 5))
+      (if (null shimbun-slashdot-get-comments)
+	  (setq url (concat url "&no_d2=1&threshold=5"))
+	(setq url (concat url "&no_d2=1&threshold="
+			  (number-to-string shimbun-slashdot-comment-threshold)
+			  "&mode=" shimbun-slashdot-comment-display
+			  "&commentsort=0&pid=0")))
+      ;; Make section prettier
+      (when section
+	(when (string= section "ask")
+	  (setq section "askslashdot"))
 	(setq subject (concat
 		       (if (< (length section) 4)
 			   (upcase section)
 			 (capitalize section))
-		       ": " (match-string 4)))
-	(while (string-match "</?[a-zA-Z]+?>" subject)
-	  (setq subject (replace-match "\"" t t subject)))
-	(if (string= (match-string 1) "briefarticles")
-	    (progn
-	      (setq hour "00")
-	      (setq minute "00")
-	      (setq from "Slashdot")
-	      (setq subject (concat "(brief article) " subject)))
-	  (when (re-search-forward shimbun-slashdot-regexp-author-time
-				   nil t)
-	    (setq from (match-string 1))
-	    (setq hour (match-string 2))
-	    (setq minute (match-string 3))
-	    ;; US->European time conversion
-	    (cond
-	     ((and (string= (match-string 4) "PM")
-		   (not (string= hour "12")))
-	      (setq hour
-		    (number-to-string (+ (string-to-number hour) 12))))
-	     ((and (string= (match-string 4) "AM")
-		   (string= hour "12"))
-	      (setq hour "00")))
-	    ;; remove link from author name if necessary
-	    (when (string-match ">\\(.*\\)</a>" from)
-	      (setq from (match-string 1 from))))
-	  (while (string-match "/" id)
-	    (setq id (replace-match "" t t id)))
-	  (setq date (shimbun-make-date-string
-		      ;; Hey, my first year 2100 bug!
-		      (string-to-number (concat "20" (substring id 0 2)))
-		      (string-to-number (substring id 2 4))
-		      (string-to-number (substring id 4 6))
-		      (format "%s:%s" hour minute)
-		      ;; Maybe we should derive this from current-time-zone?
-		      "+0000")))
+		       ": " subject)))
+      (while (string-match "</?[a-zA-Z]+?>" subject)
+	(setq subject (replace-match "\"" t t subject)))
+      (when (re-search-forward shimbun-slashdot-regexp-author-time
+			       nil t)
+	(setq from (match-string 1)
+	      month (match-string 2)
+	      day (match-string 3)
+	      hour (match-string 4)
+	      minute (match-string 5)
+	      ampm (match-string 6))
+	(setq month
+	      (- 13 (length
+		     (member-ignore-case month allmonths))))
+	;; US->European time conversion
+	(cond
+	 ((and (string= ampm "PM")
+	       (not (string= hour "12")))
+	  (setq hour
+		(number-to-string (+ (string-to-number hour) 12))))
+	 ((and (string= ampm "AM")
+	       (string= hour "12"))
+	  (setq hour "00")))
+	;; remove link from author name if necessary
+	(when (string-match ">\\(.*\\)</a>" from)
+	  (setq from (match-string 1 from)))
+	(while (string-match "/" id)
+	  (setq id (replace-match "" t t id)))
+	(setq date (shimbun-make-date-string
+		    ;; Hey, my first year 2100 bug!
+		    (string-to-number (concat "20" (substring id 0 2)))
+		    month (string-to-number day)
+		    (format "%s:%s" hour minute)
+		    ;; Maybe we should derive this from current-time-zone?
+		    "+0000"))
 	(setq id (concat "<" section id "@slashdot.org>"))
-	(when (shimbun-search-id shimbun id)
-	  (throw 'stop nil))
-	(push (shimbun-make-header
-	       0 (shimbun-mime-encode-string subject)
-	       (shimbun-mime-encode-string from)
-	       date id "" 0 0 url)
-	      headers)))
+	(unless (shimbun-search-id shimbun id)
+	  (push (shimbun-make-header
+		 0 (shimbun-mime-encode-string subject)
+		 (shimbun-mime-encode-string from)
+		 date id "" 0 0 url)
+		headers))))
     headers))
 
-(luna-define-method shimbun-multi-next-url ((shimbun shimbun-slashdot)
-                                            header url)
-  (if (and shimbun-slashdot-get-comments
-	   (progn
-	     (goto-char (point-min))
-	     (re-search-forward shimbun-slashdot-regexp-comment-system nil t)))
-      (let ((url (concat "http:" (match-string 1))))
-	(when (string-match "threshold=\\([0-9]\\)" url)
-	  (setq url
-		(replace-match
-		 (number-to-string shimbun-slashdot-comment-threshold)
-		 t t url 1)))
-	(when (string-match "mode=\\([a-zA-Z]+\\)" url)
-	  (setq url
-		(replace-match shimbun-slashdot-comment-display t t url 1)))
-	url)
-    nil))
-
 (luna-define-method shimbun-clear-contents :around ((shimbun
 						     shimbun-slashdot)
 						    header)
   (goto-char (point-min))
-  (if (or (null shimbun-slashdot-get-comments)
-	  (re-search-forward "<div class=\"intro\".*?>" nil t))
-      (progn
-	(goto-char (point-min))
-	(shimbun-remove-tags "<html>" "<div class=\"intro\".*?>")
-	(shimbun-remove-tags "<div class=\"commentBox\".*?>" "</html>")
-	(when shimbun-slashdot-get-comments
-	  (goto-char (point-max))
-	  (insert "\n<br><br>&#012\n")))
-    (shimbun-remove-tags "<html>" "<a name=\"topcomment\">")
-    (shimbun-remove-tags "<div id=\"footer\">" "</html>")))
-
+  (shimbun-remove-tags "<html>" "<div class=\"intro\".*?>")
+  (if (null shimbun-slashdot-get-comments)
+      (shimbun-remove-tags "<div class=\"commentBox\".*?>" "</html>")
+    (re-search-forward "<a name=\"topcomment\">" nil t)
+    (insert "\n<br><br>&#012\n")
+    (shimbun-remove-tags "<div id=\"footer\">" "</html>")
+    (shimbun-remove-tags "<div class=\"commentwrap\"" "<a name=\"topcomment\">")
+    ;; convert quote tags to italics
+    (goto-char (point-min))
+    (while (re-search-forward
+	    "\\(<[ ]*div[ ]+class=[\"']quote[\"'][ ]*>\\|<[ ]*blockquote[ ]*>\\)" nil t)
+      (let ((str (match-string 0)))
+	(replace-match "<i>")
+	(if (string-match "class" str)
+	    (re-search-forward "</div>")
+	  (re-search-forward "</blockquote>"))
+	(replace-match "</i>")))))
 
 (provide 'sb-slashdot)
 
Index: sb-sueddeutsche-de.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-sueddeutsche-de.el,v
retrieving revision 1.1
diff -u -r1.1 sb-sueddeutsche-de.el
--- sb-sueddeutsche-de.el	29 Jan 2008 23:08:26 -0000	1.1
+++ sb-sueddeutsche-de.el	23 Dec 2008 11:59:32 -0000
@@ -38,8 +38,6 @@
      "http://www.sueddeutsche.de/app/service/rss/ressort/wirtschaft/rss.xml")
     ("finanzen"
      "http://www.sueddeutsche.de/app/service/rss/ressort/finanzen/rss.xml")
-    ("kino"
-     "http://www.sueddeutsche.de/app/service/rss/kino/neuimkino.xml")
     ("kultur"
      "http://www.sueddeutsche.de/app/service/rss/ressort/kultur/rss.xml")
     ("sport"
@@ -85,10 +83,8 @@
     (mapcar
      (lambda (header)
        (setq url (shimbun-header-xref header))
-       (when (string-match "target=http%3A%2F%2F\\(.*\\)%2F" url)
-	 (setq url (concat "http://" (match-string 1 url) "/print.html"))
-	 (while (string-match "%2F" url)
-	   (setq url (replace-match "/" t t url)))
+       (when (string-match "ns_url=\\(http://www.sueddeutsche.de/.*\\)/" url)
+	 (setq url (concat (match-string 1 url) "/print.html"))
 	 (shimbun-header-set-xref header url))
        header)
      headers)))
@@ -98,11 +94,8 @@
 						  url date)
   (let ((group (shimbun-current-group-internal shimbun))
 	id)
-    (cond ((and (string-equal group "kino")
-		(string-match "/\\([0-9]+\\)/" url))
-	   (concat "<" (match-string 1 url) "." group "@sueddeutsche.de>"))
-	  ((string-match
-	    "target=.*sueddeutsche.de.*%2F\\([0-9]+\\)%2F\\([0-9]+\\)%2F" url)
+    (cond ((string-match
+	    "ns_url=.*sueddeutsche.de.*/\\([0-9]+\\)/\\([0-9]+\\)/" url)
 	   (concat "<" (match-string 1 url) "." (match-string 2 url) "." group
 		   "@sueddeutsche.de>"))
 	  (t