#! /bin/ksh # Check web links for the genealogy web site # written by John Guest, June 20, 1997 if [[ -f "html.list" ]] then rm html.list mv href.log href.old fi files=`ls *.html` # extract http links #for file in "index.html" for file in $files do print "processing $file ..." print "processing $file ..." >>href.log awk ' BEGIN{ FS="\"" } /telnet:/ { for (i=1;i<=NF;i++) { n = index($i,"") n += index($i,"") n += index($i,"
") if ( (substr($i,1,6) == "telnet") && n == 0) { split($i,x," ") print x[1] } } } /gopher:/ { for (i=1;i<=NF;i++) { n = index($i,"") n += index($i,"") n += index($i,"
") if ( substr($i,1,6) == "gopher" && n == 0 ) { split($i,o," ") print o[1] } } } /ftp:/ { for (i=1;i<=NF;i++) { n = index($i,"") n += index($i,"") n += index($i,"
") if ( substr($i,1,3) == "ftp" && n == 0 ) { split($i,o," ") print o[1] } } } /http:/ { for (i=1;i<=NF;i++) { n = index($i,"") n += index($i,"") n += index($i,"
") if ( substr($i,1,4) == "http" && n == 0 ) { split($i,o," ") print o[1] } } }' <$file >>html.list #done integer i i=`wc -l < html.list | awk '{printf("%d",$1)}'` print "Ready to check $i web links!" if [[ -f "href.log" ]] then mv href.log href.log.old fi hrefs=`cat html.list` integer j (( j = 0 )) for href in $hrefs do (( k = i - j )) datestamp=`date '+%H:%M:%S'` print "<$datestamp> $k links to check. Now checking $href ..." chars=`lynx -dump $href | tee href.$$ | wc -c | awk '{printf("%d",$1)}'` mask=`perl href.p >href.log (( j = j + 1 )) done rm *.$$ href.list done