[Date Prev][Date Next][Thread Prev][][Date Index][Thread Index]

Filters for stackexchange and youtube [SNIPPET]



I have filter functions for stackexchange and for youtube, and since
those are very commonly used site, I thought to share it for
consideration to be added to w3m-filter.el

-- 
hkp://keys.gnupg.net
CA45 09B5 5351 7C11 A9D1  7286 0036 9E45 1595 8BC0
; ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
; ┃ stackexchange filter                                       ┃
; ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
(defun w3m-filter-stackexchange (url)
  "filter top and bottom cruft for stackexchange.com."
  (w3m-filter-delete-regions url
    "<body.*>" "<h1.*>" t t t nil nil 1)
  (w3m-filter-delete-regions url
    "<h2 class=\"space\">Your Answer</h2>" "<h4 id=\"h-related\">Related</h4>"
    nil t nil nil nil 1)
  (w3m-filter-delete-regions url
    "<div id=\"hot-network-questions\" class=\"module tex2jax_ignore\">" "</body>"
    nil t nil nil nil 1)

;  (when (search-forward "<table>" nil t)
;    (replace-match ""))

  (goto-char (point-min))
  (w3m-filter-delete-regions url
    "<a class=\"vote-[ud]"
    "</a>" nil nil t (point))
  (goto-char (point-min))
  (w3m-filter-delete-regions url
    "<a class=\"star-off"
    "</td>")
  (w3m-filter-replace-regexp url
    "<span itemprop=\"upvoteCount[^>]+>"
    "Votes: ")
  (w3m-filter-replace-regexp url
    "<div class=\"post-text[^>]+>"
    "<blockquote>")
  (w3m-filter-replace-regexp url
    "<div class=\"post-taglist[^>]+>"
    "</blockquote>")
  (w3m-filter-delete-regions url
    "<a name='new-answer'>"
    "</form>" nil nil nil nil nil 1)

  (w3m-filter-replace-regexp url
    "<div class=\"spacer\">[^>]+>[^>]+>+?\\([0-9]+\\)</div></a>"
    "\\1 ")
  (w3m-filter-delete-regions url
    "<td class=\"vt\">"
    "</td>")

  (goto-char (point-min))
  (while (search-forward "<div class=\"user-info \">" nil t)
    (let ((p1 (match-end 0))
          (p2 (if (search-forward "<li" nil t)
                (match-beginning 0)
               (point-max))))
      (w3m-filter-delete-regions url
        "<div class=\"user-details\">" "</a>" nil nil nil p1 p2)
      (replace-regexp "</?div[^>]*>" "" nil p1 p2)
      (replace-regexp
        "<span class=\"reputation-score[^>]*>"
        "[rep:" nil p1 p2)
      (replace-regexp
        "<span class=\"badge1\">"
        "] [gold:" nil p1 p2)
      (replace-regexp
        "<span class=\"badge2\">"
        "] [silver:" nil p1 p2)
      (replace-regexp
        "<span class=\"badge3\">"
        "] [bronze:" nil p1 p2)
      (replace-regexp "</?span[^>]*>" "" nil p1 p2)))

  (w3m-filter-replace-regexp url
    "<td" "<td valign=top")

  (w3m-filter-delete-regions url
    "<div id=\"tabs\">"
    "<a name" nil t)

  (goto-char (point-min))
  (while (search-forward "<div id=\"answer-" nil t)
    (replace-match "</ul><hr>\\&"))

  (w3m-filter-delete-regions url
    "<div id=\"comments-link"
    "</div>")

  (goto-char (point-min))
  (when (search-forward "<h4 id=\"h-linked\">Linked</h4>" nil t)
    (replace-match "<p><b>Linked</b><br>")
    (let ((p1 (match-end 0))
          (p2 (progn
                (search-forward "<h4" nil t)
                (match-beginning 0))))
      (replace-regexp "^\t</a>" "" nil p1 p2)
      (replace-regexp "</a>" "</a><br>" nil p1 p2)
      (replace-regexp "</div>" " " nil p1 p2)
      (w3m-filter-delete-regions url
        "<div class=\"spacer\">"
        "<div class=\"answer-votes answered-accepted [^>]+>"
        nil nil t)))

  (goto-char (point-min))
  (when (search-forward "<table id=\"qinfo\">" nil t)
    (replace-match "")
    (let ((p1 (match-end 0))
          (p2 (progn
                (search-forward "</table>" nil t)
                (replace-match "")
                (match-end 0))))
      (w3m-filter-replace-regexp url "<tr>" "" p1 p2)
      (w3m-filter-replace-regexp url "</tr>" "<br>" p1 p2)
      (w3m-filter-replace-regexp url "</?td[^>]*>" "" p1 p2)
      (w3m-filter-replace-regexp url "<b>" "" p1 p2)
      (w3m-filter-replace-regexp url "<a[^>]+>" "" p1 p2)
      (w3m-filter-replace-regexp url "</?p[^>]*>" "" p1 p2))))


; ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
; ┃ youtube filter                                             ┃
; ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛
(defun w3m-filter-youtube (url)
  (w3m-filter-delete-regions url "<head>" "<title>" t t)
  (w3m-filter-delete-regions url "</title>" "</head>" t t)
  (insert "<body>")
  (w3m-filter-delete-regions url
    "<body" "<p class=\"num-results" nil t (point))
  (w3m-filter-delete-regions url
    "<div id=\"footer-container\"" "</body>" nil t)
  (w3m-filter-replace-regexp url "</?h4[^>]*>" "")
  (goto-char (point-min))
  (let ((p1 (point)) (p2 (search-forward "<ol" nil t)))
   (w3m-filter-replace-regexp url "<li>" " | " p1 p2)
   (w3m-filter-replace-regexp url "<ul>" "" p1 p2)
   (w3m-filter-replace-regexp url
     "<li><div class=\"yt-lockup[^>]*>" "<p><li>" p2)
   (w3m-filter-replace-regexp url "<button.*</button>" "")
   (w3m-filter-replace-regexp url "<a aria-hidden[^>]*>" "")
   (w3m-filter-replace-regexp url "</?h3[^>]*>" "")
   (goto-char (point-min))
   (while (search-forward "<ul class=\"yt-lockup-meta-info\">" nil t)
     (delete-region (match-beginning 0) (match-end 0))
     (setq p1 (point) p2 (search-forward "</ul>" nil t))
     (w3m-filter-replace-regexp url "</?li>" " " p1 p2)
     (w3m-filter-replace-regexp url "</ul>" "" p1 nil 1))
   (goto-char (point-min))
   (while (search-forward "<ul class=\"yt-badge-list \">" nil t)
     (replace-match "")
     (setq p1 (point) p2 (search-forward "</ul>" nil t))
     (insert " ")
     (w3m-filter-replace-regexp url "</?li[^>]*>" " " p1 p2)
     (w3m-filter-replace-regexp url "</ul>" "" p1 nil 1))
   (w3m-filter-replace-regexp url
     "</div><div class=\"yt-lockup-meta \">" "")
   (goto-char (point-min))
   (while (search-forward "<img" nil t)
     (let ((p1 (match-end 0)) (p2 (search-forward ">" nil t)))
      (goto-char p1)
      (when (search-forward "data-thumb=" p2 t)
        (goto-char p1)
        (when (re-search-forward "src=\"[^\"]*\"" p2 t)
          (replace-match "")
          (goto-char p1)
          (re-search-forward "data-thumb=" p2 t)
          (replace-match "src=")))))))