[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

Re: sb-the-register.el update



On Fri, 05 Jan 2007 11:40:44 +0100 David Hansen wrote:

> 2007-01-05  David Hansen  <david.hansen@xxxxxxxxxxxxxxxxxxx>
>
> 	* sb-the-register.el: (shimbun-make-contents): Improve junk filtering.
>           (shimbun-the-register-content-end): Fixed.

And here's the diff:
*** sb-the-register.el	05 Jan 2007 08:23:53 +0100	1.6
--- sb-the-register.el	05 Jan 2007 11:36:13 +0100	
***************
*** 34,40 ****
  (defvar shimbun-the-register-url "http://www.theregister.co.uk/")
  (defvar shimbun-the-register-from-address  "invalid@xxxxxxxxxxxxxxxxx")
  (defvar shimbun-the-register-content-start "<h2>")
! (defvar shimbun-the-register-content-end "<p class=\"Furniture\">")
  
  (defvar shimbun-the-register-path-alist
    '(("news" . "headlines.rss")
--- 34,41 ----
  (defvar shimbun-the-register-url "http://www.theregister.co.uk/")
  (defvar shimbun-the-register-from-address  "invalid@xxxxxxxxxxxxxxxxx")
  (defvar shimbun-the-register-content-start "<h2>")
! (defvar shimbun-the-register-content-end
!   "<p class=\"Furniture\">\\|<p id=\"Copyright\">")
  
  (defvar shimbun-the-register-path-alist
    '(("news" . "headlines.rss")
***************
*** 77,92 ****
    :before ((shimbun shimbun-the-register) header)
    (save-excursion
      ;; remove annoying stuff
!     (let ((junk '(("(<span class=\"URL\">" . "</span>)")
! 		  ("<div class=\"Ad\"" . "</div>"))))
!       (while junk
! 	(goto-char (point-min))
! 	(let ((beg-str (caar junk)) (end-str (cdar junk)) beg end)
! 	  (setq junk (cdr junk))
! 	  (while (search-forward beg-str nil t)
! 	    (setq beg (match-beginning 0))
! 	    (when (setq end (search-forward end-str nil t))
! 	      (delete-region beg end))))))))
  
  (provide 'sb-the-register)
  
--- 78,91 ----
    :before ((shimbun shimbun-the-register) header)
    (save-excursion
      ;; remove annoying stuff
!     (dolist (junk '(("(?<span class=\"URL\">" . "</span>)?")
!                     ("<div \\(class\\|id\\)=\"[^\"]*Ad\"" . "</div>")))
!       (goto-char (point-min))
!       (message "%s" (car junk))
!       (while (re-search-forward (car junk) nil t)
!         (let ((beg (match-beginning 0)))
!           (when (re-search-forward (cdr junk) nil t)
!             (delete-region beg (point))))))))
  
  (provide 'sb-the-register)