Find missing or empty alt tags in images via regex
New here? Learn about Bountify and follow @bountify to get notified of new bounties! x

I need some regex script (can be 1 or 2 scripts) which will help us scan and find in a ton of htmls with bad alts. we have the following

-missing alt tag altogether
-alt tag here but empty alt=""
-alt tag empty but has spaces alt=" "
-alt tag with no quotes alt=

Not sure it matters but we will be running this in node.js as a part of a worker process.

awarded to slang
Tags
regex

Crowdsource coding tasks.

1 Solution

Winning solution

Please don't use a regex, use JSDOM, an HTML parser, or whatever... just don't try to process a structured language with as loose syntax as HTML via regex. Off the top of my head, it would look something like this:

var jsdom, nodefn, request;
request = require('request-promise');
jsdom = require('jsdom');
nodefn = require('when/node');

request.get({
  uri: 'http://carrot.is'
}).then(function(response) {
  return nodefn.call(jsdom.env, response);
}).then(function(window) {
  var altAttr, img, _i, _len, _ref;
  _ref = window.document.querySelectorAll('img');
  for (_i = 0, _len = _ref.length; _i < _len; _i++) {
    img = _ref[_i];
    altAttr = img.getAttribute('alt');
    if ((altAttr == null) || altAttr.trim().length === 0) {
      console.warn(img.outerHTML, 'has no alt');
    }
  }
});

or in CoffeeScript:

request = require 'request-promise'
jsdom = require 'jsdom'
nodefn = require 'when/node'

request.get(
  uri: 'http://carrot.is'
).then((response) ->
  nodefn.call(jsdom.env, response)
).then((window) ->
  for img in window.document.querySelectorAll('img')
    altAttr = img.getAttribute('alt')
    if not altAttr? or altAttr.trim().length is 0
      console.warn(img.outerHTML, 'has no alt')
  return
)

Edit: I've now actually tested the above code and it seems to work.

Looks good let me run it by my team.
Qdev over 4 years ago