Ad blocking one website at a time

Alec Jacobson

June 07, 2010

weblog/

I wrote a small piece of Applescript code that allows me to zap ads from web pages that I browse using Safari. It uses javascript to remove the ads or containing objects from the page directly. It doesn't use intelligent ad recognition in any way. Rather my idea was much simpler. Most people spend 90% of there time on the web looking at a small number of websites: facebook, new york times, twitter, google, wikipedia, gmail, youtube, etc. See Pareto principle. If I can block 100% of the ads on 90% of the pages I look at then my ad blocker is effectively 90% efficient. The ads on these big sites are usually in the same place on the page making them very easy to remove from the html with javascript. I organized my applescript so that adding a new site on which to block ads is very simple (just a line or two) and blocking a certain type of ad on that site is just another line. I use the magnifying glass tool, in the Safari Developer tool, Web Inspector, to select the ad object on the page. Inevitably the website has used a class or id which then I feed into my script. Often the containing object is as simple as "ad" or "bigAd". screenshot of web inspector, selecting ad When I have my blocker running in the background, it usually takes up 2% of the CPU. If I crank down the delay in the script so that the ads disappear even faster then it can get as high as 10% or so, but it's not really worth it. There is no reason why you couldn't easily port this style of ad blocker to grease monkey, opera, or anything that runs client-side javascript. Here is the applescript, it creates a few useful javascript methods and sets up the blocker to run in the background (with a little error checking).
-- Safari AdBlocker Applescript
-- Author: Alec Jacobson http://alecjacobson.com
--


-- this might not be necessary... 
-- because safari already has getelementbyclassname... 
-- but perhaps without localization?
-- and perhaps not older safari?
set getElementsByClass to "
/*
  Developed by Robert Nyman, http://www.robertnyman.com
  Code/licensing: http://code.google.com/p/getelementsbyclassname/
*/  
var getElementsByClassName = function (className, tag, elm){
  if (document.getElementsByClassName) {
    getElementsByClassName = function (className, tag, elm) {
      elm = elm || document;
      var elements = elm.getElementsByClassName(className),
        nodeName = (tag)? new RegExp(\"\\b\" + tag + \"\\b\", \"i\") : null,
        returnElements = [],
        current;
      for(var i=0, il=elements.length; i<il; i+=1){
        current = elements[i];
        if(!nodeName || nodeName.test(current.nodeName)) {
          returnElements.push(current);
        }
      }
      return returnElements;
    };
  }
  else if (document.evaluate) {
    getElementsByClassName = function (className, tag, elm) {
      tag = tag || \"*\";
      elm = elm || document;
      var classes = className.split(\" \"),
        classesToCheck = \"\",
        xhtmlNamespace = \"http://www.w3.org/1999/xhtml\",
        namespaceResolver = (document.documentElement.namespaceURI === xhtmlNamespace)? xhtmlNamespace : null,
        returnElements = [],
        elements,
        node;
      for(var j=0, jl=classes.length; j<jl; j+=1){
        classesToCheck += \"[contains(concat(' ', @class, ' '), ' \" + classes[j] + \" ')]\";
      }
      try  {
        elements = document.evaluate(\".//\" + tag + classesToCheck, elm, namespaceResolver, 0, null);
      }
      catch (e) {
        elements = document.evaluate(\".//\" + tag + classesToCheck, elm, null, 0, null);
      }
      while ((node = elements.iterateNext())) {
        returnElements.push(node);
      }
      return returnElements;
    };
  }
  else {
    getElementsByClassName = function (className, tag, elm) {
      tag = tag || \"*\";
      elm = elm || document;
      var classes = className.split(\" \"),
        classesToCheck = [],
        elements = (tag === \"*\" && elm.all)? elm.all : elm.getElementsByTagName(tag),
        current,
        returnElements = [],
        match;
      for(var k=0, kl=classes.length; k<kl; k+=1){
        classesToCheck.push(new RegExp(\"(^|\\s)\" + classes[k] + \"(\\s|$)\"));
      }
      for(var l=0, ll=elements.length; l<ll; l+=1){
        current = elements[l];
        match = false;
        for(var m=0, ml=classesToCheck.length; m<ml; m+=1){
          match = classesToCheck[m].test(current.className);
          if (!match) {
            break;
          }
        }
        if (match) {
          returnElements.push(current);
        }
      }
      return returnElements;
    };
  }
  return getElementsByClassName(className, tag, elm);
};"
set setElementsToEmpty to "function setElementsToEmpty(a){
  for ( var i=0, len=a.length; i<len; ++i ){
    a[i].innerHTML = '';
  }
};
"

set this_url to ""
repeat
  try
    repeat while appIsRunning("Safari")
      tell application "Safari"
        
        try
          set doc to front document
          set this_url to URL of doc
          do JavaScript getElementsByClass in doc
          do JavaScript setElementsToEmpty in doc
          if this_url starts with "http://www.google.com/search?" then
            -- Regular google search
            -------------------------------------------------------------
            --
            -- ZAP CONTAINING DIV BY ITS ID
            --
            -------------------------------------------------------------
            do JavaScript "document.getElementById('rhsline').innerHTML = ''" in doc
            do JavaScript "document.getElementById('tads').innerHTML = ''" in doc
          else if this_url starts with "http://search.yahoo.com/search" then
            -- Regular yahoo search
            do JavaScript "document.getElementById('east').innerHTML = ''" in doc
            -------------------------------------------------------------
            --
            -- ZAP DIVS OF CERTAIN CLASS WITHIN  CERTAIN CONTAINING DIV (COULD BE NULL)
            --
            -------------------------------------------------------------
            do JavaScript "setElementsToEmpty(getElementsByClassName('ads horiz',null,document.getElementById('main')))" in doc
          -------------------------------------------------------------
          --
          -- ADD OTHER SITES HERE
          --
          -------------------------------------------------------------
          end if
          -- set delay amount accordingly to manage how much CPU to devote to blocking ads
          -- recommended between 1.0 and 0.001 seconds
          delay 0.1
        on error errText number errNum
          -- if anything but doc changing before ads removed or safari open but no windows
          -- pause so that CPU isn't stolen
          delay 2
        end try
      end tell
    end repeat
  on error errText number errNum
    if errNum is equal to -128 or errNum is equal to -609 then
      -- safari no longer open
    else
      display dialog errText & " " & errNum
    end if
    
  end try
  delay 5
end repeat

-- from http://codesnippets.joyent.com/posts/show/1124
on appIsRunning(appName)
  tell application "System Events" to (name of processes) contains appName
end appIsRunning
Download the ad blocker with all the sites I've blocked ads on Imagine how strong this ad blocker could be if an army of users were updating the site specific zaps. Imagine the cold war it would start. Note: Some sites inject their ads deeper into the content of the site like youtube does with its flash videos. I haven't come up with a way to single this one out yet...