[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

libxml support



Dear maintainers,

please find attached a small patch which will enable the shimbun library
to use libxml for parsing, if available (currently only emacs24-bzr,
compiled with libxml support).

This has reduced the time for updating my shimbun groups almost by half
(I use shimbun-use-local, so the time for retrieving the feeds is no
issue). The normal xml-parse-region can be pretty slow for large feeds.

Best,
David
Index: shimbun.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/shimbun.el,v
retrieving revision 1.209
diff -u -r1.209 shimbun.el
--- shimbun.el	20 Dec 2010 01:31:55 -0000	1.209
+++ shimbun.el	21 Apr 2011 20:13:39 -0000
@@ -79,6 +79,7 @@
 (require 'luna)
 (require 'std11)
 (require 'w3m)
+(require 'xml)
 
 (eval-and-compile
   (luna-define-class shimbun ()
@@ -1814,6 +1815,40 @@
     (unless (eobp)
       (shimbun-japanese-hankaku-region start (point-max) quote))))
 
+(defun shimbun-xml-parse-buffer ()
+  "Calls (lib)xml-parse-region on the whole buffer.
+This is a wrapper for xml-parse-region, which will resort to
+using libxml-parse-xml-region if available, since it is much
+faster."
+  (if (fboundp 'libxml-parse-xml-region)
+      (save-excursion
+	(goto-char (point-min))
+	(let ((xml (libxml-parse-xml-region
+		    (1- (search-forward "<" nil t)) (point-max)))
+	      (start 0)
+	      (stylestring
+	       (progn (goto-char (point-min))
+		      (when (re-search-forward "<\\(rss\\|feed\\)\\(.*?\\)>" nil t)
+			(match-string 2))))
+	      stylesheet)
+	  ;; Parse the stylesheet
+	  (when stylestring
+	    (while (string-match "\\(xmlns:?.*?\\)=\"\\(.*?\\)\"" stylestring start)
+	      (setq start (match-end 0))
+	      (push (cons (intern (match-string 1 stylestring))
+			  (match-string 2 stylestring))
+		    stylesheet)))
+	  ;; Add stylesheet into XML structure
+	  (when stylesheet
+	    (if (nth 1 xml)
+		(nconc (nth 1 xml) stylesheet)
+	      (setcar (cdr xml) stylesheet)))
+	  (list xml)))
+    ;; We don't have libxml, so just use the slow one.
+    (xml-parse-region (point-min) (point-max))))
+
+
+
 (provide 'shimbun)
 
 ;;; shimbun.el ends here
Index: sb-rss.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-rss.el,v
retrieving revision 1.51
diff -u -r1.51 sb-rss.el
--- sb-rss.el	4 Mar 2010 03:14:49 -0000	1.51
+++ sb-rss.el	21 Apr 2011 20:13:40 -0000
@@ -183,7 +183,7 @@
 from the feed have date information, the result is sorted by
 ascending date."
   (let* ((xml (condition-case err
-		  (xml-parse-region (point-min) (point-max))
+		  (shimbun-xml-parse-buffer)
 		(error
 		 (message "Error while parsing %s: %s"
 			  (shimbun-index-url shimbun)
Index: sb-rss-hash.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-rss-hash.el,v
retrieving revision 1.6
diff -u -r1.6 sb-rss-hash.el
--- sb-rss-hash.el	13 May 2009 22:50:46 -0000	1.6
+++ sb-rss-hash.el	21 Apr 2011 20:13:40 -0000
@@ -62,7 +62,7 @@
       (insert buf-str)
       ;; parse xml : check url and desc
       (setq xml (condition-case err
-		    (xml-parse-region (point-min) (point-max))
+		    (shimbun-xml-parse-buffer)
 		  (error
 		   (message "Error while parsing %s: %s"
 			    (content-hash-contents-url content-hash shimbun)
Index: sb-atom.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-atom.el,v
retrieving revision 1.10
diff -u -r1.10 sb-atom.el
--- sb-atom.el	4 Mar 2010 03:14:49 -0000	1.10
+++ sb-atom.el	21 Apr 2011 20:13:40 -0000
@@ -59,7 +59,7 @@
 items from the feed are returned.  If the entries from the feed
 have date information, the result is sorted by ascending date."
   (let* ((xml (condition-case err
-		  (xml-parse-region (point-min) (point-max))
+		  (shimbun-xml-parse-buffer)
 		(error
 		 (message "Error while parsing %s: %s"
 			  (shimbun-index-url shimbun)
Index: sb-atom-hash.el
===================================================================
RCS file: /storage/cvsroot/emacs-w3m/shimbun/sb-atom-hash.el,v
retrieving revision 1.9
diff -u -r1.9 sb-atom-hash.el
--- sb-atom-hash.el	4 Nov 2009 04:07:11 -0000	1.9
+++ sb-atom-hash.el	21 Apr 2011 20:13:40 -0000
@@ -62,7 +62,7 @@
       (insert buf-str)
       ;; parse xml : check url and desc
       (setq xml (condition-case err
-		    (xml-parse-region (point-min) (point-max))
+		    (shimbun-xml-parse-buffer)
 		  (error
 		   (message "Error while parsing %s: %s"
 			    (content-hash-contents-url content-hash shimbun)