#! /bin/sed -nf

# Join lines if we have tags that span multiple lines
:join
/<[^>]*$/ {
    N
    s/[ 	*]\n[ 	*]/ /
    b join
}  

# Do some selection to speed the thing up
/<[ 	]*\([aA]\|[iI][mM][gG]\)/ !b 

# Remove extra spaces before/after the tag name, change img/area to a
s/<[ 	]*\([aA]\|[iI][mM][gG]|[aA][rR][eE][aA]\)[ 	]\+/<a /g

# To simplify the regexps that follow, change href/alt to lowercase
# and replace whitespace before them with a single space
s/<a\([^>]*\)[ 	][hH][rR][eE][fF]=/<a\1 href=/g
s/<a\([^>]*\)[ 	][aA][lL][tT]=/<a\1 alt=/g

# To simplify the regexps that follow, quote the arguments to href and alt
s/href=\([^" 	>]\+\)/href="\1"/g
s/alt=\([^" 	>]\+\)/alt="\1"/g

# Move the alt tag after href, remove attributes between them
s/\( alt="[^"]*"\)[^>]*\( href="[^"]*"\)/\2\1/g

# Remove attributes between <a and href
s/<a[^>]* href="/<a href="/g

# Change href="xxx" ... alt="yyy" to href="xxx|yyy"
s/\(<a href="[^"]*\)"[^>]* alt="\([^"]*"\)/\1|\2/g

t loop

# Print an URL, remove it, and loop
:loop
h  
s/.*<a href="\([^"]*\)".*$/\1/p
g  
s/\(.*\)<a href="\([^"]*\)".*$/\1/
t loop

### colorized by sedsed, a SED script debugger/indenter/tokenizer/HTMLizer
### original script: http://sed.sf.net/grabbag/scripts/list_urls.sed