[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

sb-atom and sb-rss fetching already deleted articles



Dear emacs-w3m maintainers,

in (info "(emacs-w3m)nnshimbun") it says:

   * Third of all, and this is written down so that you can remember it
     when you're filled with doubt: even when all articles from a
     `nnshimbun' group should be expired, the most recent one will be
     kept.  This is not to satisfy your indecisive heart, it's because
     the next time you fetch new articles for this group, `nnshimbun'
     will know where to begin and not fetch all the articles all over
     again.

However, sb-rss as well as sb-atom do not seem to use this
information. You can test this as follows:

In Gnus, use

(setq shimbun-rss-hash-group-path-alist
      '(("randomsample" "http://www.randomsample.de/dru5/blog/feed" t)))

and create new "rss-hash" shimbun "randomsample". Enter the group and
expire all articles, which should delete all articles except the newest
one. Now do M-g on the group, and all the expired articles will appear
as new.

I attached a patch for sb-atom and sb-rss which fixes this issue for
me. It assumes that new articles are always inserted at the top of a
feed - AFAIK, this isn't strictly required for a atom/rss, but I've yet
to see a feed where this wasn't the case.

Regards,
David

PS: It's a very simple change, but due to indentation the diff output is
quite long. Just use --ignore-space to see the actual change.
Index: sb-atom.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-atom.el,v
retrieving revision 1.7
diff -u -r1.7 sb-atom.el
--- sb-atom.el	4 Nov 2009 04:07:11 -0000	1.7
+++ sb-atom.el	17 Feb 2010 15:05:05 -0000
@@ -70,53 +70,57 @@
 	     (author (when (consp author-node)
 		       (mapconcat fn author-node ",")))
 	     url)
-	(dolist (entry (shimbun-rss-find-el
-			(intern (concat atom-ns "entry")) xml))
-	  (setq url
-		(catch 'url
-		  (dolist (link (shimbun-rss-find-el
-				 (intern (concat atom-ns "link")) entry))
-		    (when (string= (shimbun-atom-attribute-value
-				    (intern (concat atom-ns "rel")) link)
-				   "alternate")
-		      (throw 'url (shimbun-atom-attribute-value
-				   (intern (concat atom-ns "href")) link))))))
-	  (unless url
-	    (setq url (shimbun-atom-attribute-value
-		       (intern (concat atom-ns "href"))
-		       (car (shimbun-rss-find-el
-			     (intern (concat atom-ns "link")) entry)))))
-	  (when url
-	    (let* ((date (or (shimbun-rss-get-date shimbun url)
-			     (shimbun-rss-node-text atom-ns 'updated entry)
-			     (shimbun-rss-node-text atom-ns 'published entry)
-			     (shimbun-rss-node-text atom-ns 'modified entry)
-			     (shimbun-rss-node-text atom-ns 'created entry)
-			     (shimbun-rss-node-text atom-ns 'issued entry)
-			     (shimbun-rss-node-text dc-ns 'date entry)))
-		   (author-node (shimbun-rss-find-el
-				 (intern (concat atom-ns "author")) entry))
-		   (author (or (and (consp author-node)
-				    (mapconcat fn author-node ","))
-			       (shimbun-rss-node-text dc-ns 'creator entry)
-			       (shimbun-rss-node-text dc-ns 'contributor entry)
-			       author))
-		   (id (shimbun-rss-build-message-id shimbun url date)))
-	      (when (and id (or need-all-entries
-				(not (shimbun-search-id shimbun id))))
-		(push (shimbun-create-header
-		       0
-		       (or (shimbun-rss-node-text atom-ns 'title entry)
-			   (shimbun-rss-node-text dc-ns 'subject entry))
-		       (or author (shimbun-from-address shimbun))
-		       (shimbun-rss-process-date shimbun date)
-		       id "" 0 0 url
-		       (when need-summaries
-			 (let ((summary (shimbun-rss-node-text
-					 atom-ns 'summary entry)))
-			   (when summary
-			     (list (cons 'summary summary))))))
-		      headers))))))
+	(catch 'done
+	  (dolist (entry (shimbun-rss-find-el
+			  (intern (concat atom-ns "entry")) xml))
+	    (setq url
+		  (catch 'url
+		    (dolist (link (shimbun-rss-find-el
+				   (intern (concat atom-ns "link")) entry))
+		      (when (string= (shimbun-atom-attribute-value
+				      (intern (concat atom-ns "rel")) link)
+				     "alternate")
+			(throw 'url (shimbun-atom-attribute-value
+				     (intern (concat atom-ns "href")) link))))))
+	    (unless url
+	      (setq url (shimbun-atom-attribute-value
+			 (intern (concat atom-ns "href"))
+			 (car (shimbun-rss-find-el
+			       (intern (concat atom-ns "link")) entry)))))
+	    (when url
+	      (let* ((date (or (shimbun-rss-get-date shimbun url)
+			       (shimbun-rss-node-text atom-ns 'updated entry)
+			       (shimbun-rss-node-text atom-ns 'published entry)
+			       (shimbun-rss-node-text atom-ns 'modified entry)
+			       (shimbun-rss-node-text atom-ns 'created entry)
+			       (shimbun-rss-node-text atom-ns 'issued entry)
+			       (shimbun-rss-node-text dc-ns 'date entry)))
+		     (author-node (shimbun-rss-find-el
+				   (intern (concat atom-ns "author")) entry))
+		     (author (or (and (consp author-node)
+				      (mapconcat fn author-node ","))
+				 (shimbun-rss-node-text dc-ns 'creator entry)
+				 (shimbun-rss-node-text dc-ns 'contributor entry)
+				 author))
+		     (id (shimbun-rss-build-message-id shimbun url date)))
+		(when (and id
+			   (not need-all-entries)
+			   (shimbun-search-id shimbun id))
+		  (throw 'done nil))
+		(when id
+		  (push (shimbun-create-header
+			 0
+			 (or (shimbun-rss-node-text atom-ns 'title entry)
+			     (shimbun-rss-node-text dc-ns 'subject entry))
+			 (or author (shimbun-from-address shimbun))
+			 (shimbun-rss-process-date shimbun date)
+			 id "" 0 0 url
+			 (when need-summaries
+			   (let ((summary (shimbun-rss-node-text
+					   atom-ns 'summary entry)))
+			     (when summary
+			       (list (cons 'summary summary))))))
+			headers)))))))
       headers)))
 
 (defun shimbun-atom-attribute-value (attribute node)
Index: sb-rss.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-rss.el,v
retrieving revision 1.47
diff -u -r1.47 sb-rss.el
--- sb-rss.el	13 May 2009 09:37:04 -0000	1.47
+++ sb-rss.el	17 Feb 2010 15:05:05 -0000
@@ -202,47 +202,50 @@
 				  '(body nil))
 		      (generate-new-buffer " *temp*")))
       (unwind-protect
-	  (dolist (item (shimbun-rss-find-el (intern (concat rss-ns "item"))
-					     xml)
-			headers)
-	    (let ((url (and (listp item)
-			    (eq (intern (concat rss-ns "item")) (car item))
-			    (shimbun-rss-node-text rss-ns 'link (cddr item)))))
-	      (when url
-		(let* ((date (or (shimbun-rss-get-date shimbun url)
-				 (shimbun-rss-node-text dc-ns 'date item)
-				 (shimbun-rss-node-text rss-ns 'pubDate item)))
-		       (id (shimbun-rss-build-message-id shimbun url date))
-		       (subject (shimbun-rss-node-text rss-ns 'title item)))
-		  (when (and id
-			     (or need-all-items
-				 (not (shimbun-search-id shimbun id)))
-			     (if (and ignored-subject subject)
-				 (not (string-match ignored-subject subject))
-			       t))
-		    (push
-		     (shimbun-create-header
-		      0
-		      (if hankaku
-			  (with-current-buffer hankaku
-			    (insert (or subject ""))
-			    (shimbun-japanese-hankaku-region (point-min)
-							     (point-max))
-			    (prog1 (buffer-string) (erase-buffer)))
-			subject)
-		      (or (shimbun-rss-node-text rss-ns 'author item)
-			  (shimbun-rss-node-text dc-ns 'creator item)
-			  (shimbun-rss-node-text dc-ns 'contributor item)
-			  author
-			  (shimbun-from-address shimbun))
-		      (shimbun-rss-process-date shimbun date)
-		      id "" 0 0 url
-		      (when need-descriptions
-			(let ((description (shimbun-rss-node-text
-					    rss-ns 'description item)))
-			  (when description
-			    (list (cons 'description description))))))
-		     headers))))))
+	  (catch 'done
+	    (dolist (item (shimbun-rss-find-el (intern (concat rss-ns "item"))
+					       xml)
+			  headers)
+	      (let ((url (and (listp item)
+			      (eq (intern (concat rss-ns "item")) (car item))
+			      (shimbun-rss-node-text rss-ns 'link (cddr item)))))
+		(when url
+		  (let* ((date (or (shimbun-rss-get-date shimbun url)
+				   (shimbun-rss-node-text dc-ns 'date item)
+				   (shimbun-rss-node-text rss-ns 'pubDate item)))
+			 (id (shimbun-rss-build-message-id shimbun url date))
+			 (subject (shimbun-rss-node-text rss-ns 'title item)))
+		    (when (and id
+			       (shimbun-search-id shimbun id)
+			       (not need-all-items))
+		      (throw 'done nil))
+		    (when (and id
+			       (if (and ignored-subject subject)
+				   (not (string-match ignored-subject subject))
+				 t))
+		      (push
+		       (shimbun-create-header
+			0
+			(if hankaku
+			    (with-current-buffer hankaku
+			      (insert (or subject ""))
+			      (shimbun-japanese-hankaku-region (point-min)
+							       (point-max))
+			      (prog1 (buffer-string) (erase-buffer)))
+			  subject)
+			(or (shimbun-rss-node-text rss-ns 'author item)
+			    (shimbun-rss-node-text dc-ns 'creator item)
+			    (shimbun-rss-node-text dc-ns 'contributor item)
+			    author
+			    (shimbun-from-address shimbun))
+			(shimbun-rss-process-date shimbun date)
+			id "" 0 0 url
+			(when need-descriptions
+			  (let ((description (shimbun-rss-node-text
+					      rss-ns 'description item)))
+			    (when description
+			      (list (cons 'description description))))))
+		       headers)))))))
 	(when (buffer-live-p hankaku)
 	  (kill-buffer hankaku))))))
 
Index: ChangeLog
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/ChangeLog,v
retrieving revision 1.228
diff -u -r1.228 ChangeLog
--- ChangeLog	10 Feb 2010 04:48:57 -0000	1.228
+++ ChangeLog	17 Feb 2010 15:05:06 -0000
@@ -1,3 +1,9 @@
+2010-02-17  David Engster  <dengste@xxxxxx>
+
+	* sb-atom.el (shimbun-atom-get-headers): When encountering an already
+	present article, stop fetching further entries.
+	* sb-rss.el (shimbun-rss-get-headers): Same change as in sb-atom.
+
 2010-02-10  Katsumi Yamaoka  <yamaoka@xxxxxxx>
 
 	* sb-itmedia.el (shimbun-itmedia-group-alist): Add ogikubo, tachibana,