#! /bin/ksh
# Check web links for the genealogy web site
# written by John Guest, June 20, 1997
if [[ -f "html.list" ]]
then
rm html.list
mv href.log href.old
fi
files=`ls *.html`
# extract http links
#for file in "index.html"
for file in $files
do
print "processing $file ..."
print "processing $file ..." >>href.log
awk '
BEGIN{
FS="\""
}
/telnet:/ {
for (i=1;i<=NF;i++) {
n = index($i,"")
n += index($i,"")
n += index($i,"
")
if ( (substr($i,1,6) == "telnet") && n == 0) {
split($i,x," ")
print x[1]
}
}
}
/gopher:/ {
for (i=1;i<=NF;i++) {
n = index($i,"")
n += index($i,"")
n += index($i,"
")
if ( substr($i,1,6) == "gopher" && n == 0 ) {
split($i,o," ")
print o[1]
}
}
}
/ftp:/ {
for (i=1;i<=NF;i++) {
n = index($i,"")
n += index($i,"")
n += index($i,"
")
if ( substr($i,1,3) == "ftp" && n == 0 ) {
split($i,o," ")
print o[1]
}
}
}
/http:/ {
for (i=1;i<=NF;i++) {
n = index($i,"")
n += index($i,"")
n += index($i,"
")
if ( substr($i,1,4) == "http" && n == 0 ) {
split($i,o," ")
print o[1]
}
}
}' <$file >>html.list
#done
integer i
i=`wc -l < html.list | awk '{printf("%d",$1)}'`
print "Ready to check $i web links!"
if [[ -f "href.log" ]]
then
mv href.log href.log.old
fi
hrefs=`cat html.list`
integer j
(( j = 0 ))
for href in $hrefs
do
(( k = i - j ))
datestamp=`date '+%H:%M:%S'`
print "<$datestamp> $k links to check. Now checking $href ..."
chars=`lynx -dump $href | tee href.$$ | wc -c | awk '{printf("%d",$1)}'`
mask=`perl href.p >href.log
(( j = j + 1 ))
done
rm *.$$ href.list
done