#! /bin/sed -nf # Join lines if we have tags that span multiple lines :join /<[^>]*$/ { N; s/[ *]\n[ *]/ /; b join; } # Do some selection to speed the thing up /<[ ]*\([aA]\|[iI][mM][gG]\)/!b # Remove extra spaces before/after the tag name, change img/area to a s/<[ ]*\([aA]\|[iI][mM][gG]|[aA][rR][eE][aA]\)[ ]\+/]*\)[ ][hH][rR][eE][fF]=/]*\)[ ][aA][lL][tT]=/]\+\)/href="\1"/g s/alt=\([^" >]\+\)/alt="\1"/g # Move the alt tag after href, remove attributes between them s/\( alt="[^"]*"\)[^>]*\( href="[^"]*"\)/\2\1/g # Remove attributes between ]* href="/]* alt="\([^"]*"\)/\1|\2/g t loop # Print an URL, remove it, and loop :loop h s/.*