[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Shimbun updates
- From: David Engster <deng@xxxxxxxxxxxxxxx>
- Date: Tue, 23 Dec 2008 13:02:22 +0100
- X-ml-name: emacs-w3m
- X-mail-count: 10554
Attached are two patches for the slashdot and the sueddeutsche-de
shimbuns. Please see the ChangeLog entry for details.
Regards,
David
Index: ChangeLog
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/ChangeLog,v
retrieving revision 1.160
diff -u -r1.160 ChangeLog
--- ChangeLog 16 Dec 2008 11:04:11 -0000 1.160
+++ ChangeLog 23 Dec 2008 11:59:19 -0000
@@ -1,3 +1,22 @@
+2008-12-23 David Engster <dengste@xxxxxx>
+
+ * sb-sueddeutsche-de.el (shimbun-sueddeutsche-de-group-url): Removed
+ kino group since feed is broken.
+ (shimbun-get-headers, shimbun-rss-build-message-id): Adapt to new URL
+ scheme.
+
+ * sb-slashdot.el (sb-multi): Removed.
+ (shimbun-slashdot-group-url): Changed frontpage URL.
+ (shimbun-slashdot-regexp-section-id-subject): Adapt regexp.
+ (shimbun-slashdot-regexp-author-time): Changed for better date parsing.
+ (shimbun-slashdot-regexp-comment-system): Removed.
+ (shimbun-slashdot-get-headers): Adapt to new site design. Improved
+ date parsing. Immediately retrieve old comment system. Removed
+ catch/throw since new articles need not be at the top.
+ (shimbun-multi-next-url): Removed.
+ (shimbun-clear-contents): Changed due to removal of sb-multi. Set
+ quotes in italics.
+
2008-12-16 Katsumi Yamaoka <yamaoka@xxxxxxx>
* sb-itmedia.el (shimbun-itmedia-content-start): Don't exclude author's
Index: sb-slashdot.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-slashdot.el,v
retrieving revision 1.4
diff -u -r1.4 sb-slashdot.el
--- sb-slashdot.el 11 Nov 2008 00:09:29 -0000 1.4
+++ sb-slashdot.el 23 Dec 2008 11:59:45 -0000
@@ -23,12 +23,11 @@
;;; Code:
(require 'shimbun)
-(require 'sb-multi)
-(luna-define-class shimbun-slashdot (shimbun-multi shimbun) ())
+(luna-define-class shimbun-slashdot (shimbun) ())
(defvar shimbun-slashdot-group-url
- '(("frontpage" "http://www.slashdot.org")
+ '(("frontpage" "http://slashdot.org")
("apple" "http://apple.slashdot.org")
("askslashdot" "http://ask.slashdot.org")
("books" "http://books.slashdot.org")
@@ -56,14 +55,14 @@
Can be 'flat', 'thread', or 'nested'.")
(defvar shimbun-slashdot-regexp-section-id-subject
- "<\\(?:div\\|h3\\)[ \t]+class=\"\\(generaltitle\\|briefarticles\\|story\\)\"[^\0]*?\
-<a[ \t]+href=\".*slashdot.org/\\(.*?\\)/\\(.*?\\).shtml\".*?>\\(.*?\\)</a>")
+ "<\\s-*h3\\s-+class=\"story\"[^\0]*?<a\\s-+href=\"\
+\\(?:/*\\([a-zA-Z]+\\)?\\.?slashdot.org/article.pl\\?sid=\\(.*?\\)\
+\\|.*slashdot.org/\\(.*?\\)/\\(.*?\\).shtml\\)\
+\".*?>\\(.*?\\)</a>")
(defvar shimbun-slashdot-regexp-author-time
- "Posted[\t \n]+by[^a-zA-Z]*\\(.*\\)[^\0]*?@\\([0-9]+\\):\\([0-9]+\\)\\(AM\\|PM\\)")
-
-(defvar shimbun-slashdot-regexp-comment-system
- "use[ \t]+<a[ \t]+href=\"\\(.+\\)\">[ \t]*the classic discussion system")
+ "Posted[\t \n]+by[^a-zA-Z]*\\(.*\\)[^\0]*?on\\s-+[a-zA-Z]+\\s-+\
+\\([a-zA-Z]+\\)\\s-+\\([0-9]+\\).+@\\([0-9]+\\):\\([0-9]+\\)\\(AM\\|PM\\)")
(defvar shimbun-slashdot-groups
(mapcar 'car shimbun-slashdot-group-url))
@@ -86,98 +85,97 @@
(defun shimbun-slashdot-get-headers (shimbun)
(let ((from "Slashdot <invalid@xxxxxxxxxxxx>")
- hour minute date ampm id url subject headers section)
- (catch 'stop
- (while (re-search-forward shimbun-slashdot-regexp-section-id-subject
- nil t)
- (setq section (match-string 2))
- (setq id (match-string 3))
- (setq url (concat "http://www.slashdot.org/" section "/" id ".shtml"))
- ;; Make section prettier
+ (allmonths '("january" "february" "march" "april" "may" "june"
+ "july" "august" "september" "october" "november"
+ "december"))
+ month day hour minute date ampm id url subject headers section)
+ ;; Make article URL
+ (while (re-search-forward shimbun-slashdot-regexp-section-id-subject
+ nil t)
+ (setq section (or (match-string 1) (match-string 3))
+ id (or (match-string 2) (match-string 4))
+ url (concat "http://" section ".slashdot.org/article.pl?sid=" id
+ "&simpledesign=1&lowbandwidth=1")
+ subject (match-string 5))
+ (if (null shimbun-slashdot-get-comments)
+ (setq url (concat url "&no_d2=1&threshold=5"))
+ (setq url (concat url "&no_d2=1&threshold="
+ (number-to-string shimbun-slashdot-comment-threshold)
+ "&mode=" shimbun-slashdot-comment-display
+ "&commentsort=0&pid=0")))
+ ;; Make section prettier
+ (when section
+ (when (string= section "ask")
+ (setq section "askslashdot"))
(setq subject (concat
(if (< (length section) 4)
(upcase section)
(capitalize section))
- ": " (match-string 4)))
- (while (string-match "</?[a-zA-Z]+?>" subject)
- (setq subject (replace-match "\"" t t subject)))
- (if (string= (match-string 1) "briefarticles")
- (progn
- (setq hour "00")
- (setq minute "00")
- (setq from "Slashdot")
- (setq subject (concat "(brief article) " subject)))
- (when (re-search-forward shimbun-slashdot-regexp-author-time
- nil t)
- (setq from (match-string 1))
- (setq hour (match-string 2))
- (setq minute (match-string 3))
- ;; US->European time conversion
- (cond
- ((and (string= (match-string 4) "PM")
- (not (string= hour "12")))
- (setq hour
- (number-to-string (+ (string-to-number hour) 12))))
- ((and (string= (match-string 4) "AM")
- (string= hour "12"))
- (setq hour "00")))
- ;; remove link from author name if necessary
- (when (string-match ">\\(.*\\)</a>" from)
- (setq from (match-string 1 from))))
- (while (string-match "/" id)
- (setq id (replace-match "" t t id)))
- (setq date (shimbun-make-date-string
- ;; Hey, my first year 2100 bug!
- (string-to-number (concat "20" (substring id 0 2)))
- (string-to-number (substring id 2 4))
- (string-to-number (substring id 4 6))
- (format "%s:%s" hour minute)
- ;; Maybe we should derive this from current-time-zone?
- "+0000")))
+ ": " subject)))
+ (while (string-match "</?[a-zA-Z]+?>" subject)
+ (setq subject (replace-match "\"" t t subject)))
+ (when (re-search-forward shimbun-slashdot-regexp-author-time
+ nil t)
+ (setq from (match-string 1)
+ month (match-string 2)
+ day (match-string 3)
+ hour (match-string 4)
+ minute (match-string 5)
+ ampm (match-string 6))
+ (setq month
+ (- 13 (length
+ (member-ignore-case month allmonths))))
+ ;; US->European time conversion
+ (cond
+ ((and (string= ampm "PM")
+ (not (string= hour "12")))
+ (setq hour
+ (number-to-string (+ (string-to-number hour) 12))))
+ ((and (string= ampm "AM")
+ (string= hour "12"))
+ (setq hour "00")))
+ ;; remove link from author name if necessary
+ (when (string-match ">\\(.*\\)</a>" from)
+ (setq from (match-string 1 from)))
+ (while (string-match "/" id)
+ (setq id (replace-match "" t t id)))
+ (setq date (shimbun-make-date-string
+ ;; Hey, my first year 2100 bug!
+ (string-to-number (concat "20" (substring id 0 2)))
+ month (string-to-number day)
+ (format "%s:%s" hour minute)
+ ;; Maybe we should derive this from current-time-zone?
+ "+0000"))
(setq id (concat "<" section id "@slashdot.org>"))
- (when (shimbun-search-id shimbun id)
- (throw 'stop nil))
- (push (shimbun-make-header
- 0 (shimbun-mime-encode-string subject)
- (shimbun-mime-encode-string from)
- date id "" 0 0 url)
- headers)))
+ (unless (shimbun-search-id shimbun id)
+ (push (shimbun-make-header
+ 0 (shimbun-mime-encode-string subject)
+ (shimbun-mime-encode-string from)
+ date id "" 0 0 url)
+ headers))))
headers))
-(luna-define-method shimbun-multi-next-url ((shimbun shimbun-slashdot)
- header url)
- (if (and shimbun-slashdot-get-comments
- (progn
- (goto-char (point-min))
- (re-search-forward shimbun-slashdot-regexp-comment-system nil t)))
- (let ((url (concat "http:" (match-string 1))))
- (when (string-match "threshold=\\([0-9]\\)" url)
- (setq url
- (replace-match
- (number-to-string shimbun-slashdot-comment-threshold)
- t t url 1)))
- (when (string-match "mode=\\([a-zA-Z]+\\)" url)
- (setq url
- (replace-match shimbun-slashdot-comment-display t t url 1)))
- url)
- nil))
-
(luna-define-method shimbun-clear-contents :around ((shimbun
shimbun-slashdot)
header)
(goto-char (point-min))
- (if (or (null shimbun-slashdot-get-comments)
- (re-search-forward "<div class=\"intro\".*?>" nil t))
- (progn
- (goto-char (point-min))
- (shimbun-remove-tags "<html>" "<div class=\"intro\".*?>")
- (shimbun-remove-tags "<div class=\"commentBox\".*?>" "</html>")
- (when shimbun-slashdot-get-comments
- (goto-char (point-max))
- (insert "\n<br><br>\n")))
- (shimbun-remove-tags "<html>" "<a name=\"topcomment\">")
- (shimbun-remove-tags "<div id=\"footer\">" "</html>")))
-
+ (shimbun-remove-tags "<html>" "<div class=\"intro\".*?>")
+ (if (null shimbun-slashdot-get-comments)
+ (shimbun-remove-tags "<div class=\"commentBox\".*?>" "</html>")
+ (re-search-forward "<a name=\"topcomment\">" nil t)
+ (insert "\n<br><br>\n")
+ (shimbun-remove-tags "<div id=\"footer\">" "</html>")
+ (shimbun-remove-tags "<div class=\"commentwrap\"" "<a name=\"topcomment\">")
+ ;; convert quote tags to italics
+ (goto-char (point-min))
+ (while (re-search-forward
+ "\\(<[ ]*div[ ]+class=[\"']quote[\"'][ ]*>\\|<[ ]*blockquote[ ]*>\\)" nil t)
+ (let ((str (match-string 0)))
+ (replace-match "<i>")
+ (if (string-match "class" str)
+ (re-search-forward "</div>")
+ (re-search-forward "</blockquote>"))
+ (replace-match "</i>")))))
(provide 'sb-slashdot)
Index: sb-sueddeutsche-de.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-sueddeutsche-de.el,v
retrieving revision 1.1
diff -u -r1.1 sb-sueddeutsche-de.el
--- sb-sueddeutsche-de.el 29 Jan 2008 23:08:26 -0000 1.1
+++ sb-sueddeutsche-de.el 23 Dec 2008 11:59:32 -0000
@@ -38,8 +38,6 @@
"http://www.sueddeutsche.de/app/service/rss/ressort/wirtschaft/rss.xml")
("finanzen"
"http://www.sueddeutsche.de/app/service/rss/ressort/finanzen/rss.xml")
- ("kino"
- "http://www.sueddeutsche.de/app/service/rss/kino/neuimkino.xml")
("kultur"
"http://www.sueddeutsche.de/app/service/rss/ressort/kultur/rss.xml")
("sport"
@@ -85,10 +83,8 @@
(mapcar
(lambda (header)
(setq url (shimbun-header-xref header))
- (when (string-match "target=http%3A%2F%2F\\(.*\\)%2F" url)
- (setq url (concat "http://" (match-string 1 url) "/print.html"))
- (while (string-match "%2F" url)
- (setq url (replace-match "/" t t url)))
+ (when (string-match "ns_url=\\(http://www.sueddeutsche.de/.*\\)/" url)
+ (setq url (concat (match-string 1 url) "/print.html"))
(shimbun-header-set-xref header url))
header)
headers)))
@@ -98,11 +94,8 @@
url date)
(let ((group (shimbun-current-group-internal shimbun))
id)
- (cond ((and (string-equal group "kino")
- (string-match "/\\([0-9]+\\)/" url))
- (concat "<" (match-string 1 url) "." group "@sueddeutsche.de>"))
- ((string-match
- "target=.*sueddeutsche.de.*%2F\\([0-9]+\\)%2F\\([0-9]+\\)%2F" url)
+ (cond ((string-match
+ "ns_url=.*sueddeutsche.de.*/\\([0-9]+\\)/\\([0-9]+\\)/" url)
(concat "<" (match-string 1 url) "." (match-string 2 url) "." group
"@sueddeutsche.de>"))
(t