Changeset 1123 for Validate External Links/validate_external_links.sh
- Timestamp:
- Mar 21, 2020, 11:08:35 PM (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
Validate External Links/validate_external_links.sh
r1122 r1123 456 456 valPrint r "\b1 Summary \b0 ($ELAPSED)" 457 457 valPrint hn "<h3><span id=\"summary\">Summary ($ELAPSED)</span></h3>" 458 valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there were$FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))."458 valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there $(pluralCheckWas $FILE_LINKS) $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))." 459 459 460 460 # Print processed link totals 461 461 if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi 462 462 if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi 463 if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had issues"; fi464 if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr h "(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi465 if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) wereOK"; fi466 if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr h "(counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi463 if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had $(pluralCheckAn $LINK_PROBLEMS)$(pluralCheckNoun issue $LINK_PROBLEMS)"; fi 464 if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; valPrint h "nbsp;nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi 465 if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi 466 if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; valPrint h " (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi 467 467 468 468 # Print excepted link totals … … 482 482 483 483 # Print checked link totals 484 if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issue s $LINKS_CHECKED):"; fi484 if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issue $LINK_PROBLEMS):"; fi 485 485 if [ $NG_LINKS -gt 0 ]; then valPrint ctrh "- $NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi 486 486 if [ $RD_LINKS -gt 0 ]; then valPrint ctrh "- $RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi … … 641 641 if [ "$NS_NAME" == "" ]; then 642 642 if [ $NS_ID == "NULL" ]; then 643 valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."643 valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace (and probably the page too) is 'NULL'. Probably the link is no longer in existence on the wiki." 644 644 else 645 valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAMEbecause I could not find a name for namespace ID $NS_ID."645 valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID." 646 646 fi 647 647 let SKIP_UNK_NS+=1 … … 658 658 PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//') 659 659 if [ $PAGE_NAME_SUFFIX == "js" ]; then 660 valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."660 valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'." 661 661 let SKIP_JS_PAGE+=1 662 662 continue … … 679 679 # Scan for illegal characters 680 680 if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then 681 valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL."681 valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL." 682 682 let SKIP_BAD_URL+=1 683 683 continue … … 696 696 # 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it 697 697 if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then 698 valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters."698 valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters." 699 699 let SKIP_NON_ASCII+=1 700 700 continue … … 759 759 STR_TYPE="" 760 760 if [ $IS_FILE -eq -1 ]; then 761 valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."761 valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES." 762 762 let SKIP_UNK_SUFFIX+=1 763 763 continue … … 772 772 # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an 773 773 # issue with sites that require HTTPS 774 CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent ' "$AGENT"' --max-time 10 --write-out '%{http_code}\n' $URL)774 CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{http_code}\n' $URL) 775 775 CURL_ERR=$(echo $?) 776 776 CURL_RESULT=$CURL_CODE … … 822 822 if [[ $CODE == $CURL_CODE ]]; then 823 823 # Get URL header again in order to retrieve the URL we are being redirected to 824 NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent ' "$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL)824 NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{redirect_url}\n' $URL) 825 825 826 826 # Adjust the old and new URLs to both use HTTP for comparison purposes, so we can filter … … 842 842 # wants those to be reported) 843 843 if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then 844 valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show http->https upgrades, and we were redirected to $NEW_URL."844 valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'." 845 845 STATUS="OK" 846 846 let OK_LINKS+=1 … … 849 849 # those to be reported) 850 850 elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then 851 valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show added trailing slashes, and we were redirected to $NEW_URL."851 valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'." 852 852 STATUS="OK" 853 853 let OK_LINKS+=1 … … 875 875 # If we didn't match a known status code, advise the reader 876 876 if [ $STATUS == "??" ]; then 877 valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE."877 valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown return code $CURL_CODE." 878 878 let SKIP_UNK_CODE+=1 879 879 continue … … 896 896 EXCEPT_CODE=${GREP_RESULT%%,*} 897 897 if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then 898 valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file."898 valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, $EXPECT_CODE, is listed in the exceptions file." 899 899 if [ $STATUS == "EI" ]; then 900 900 let SKIP_EXPECT_EI+=1 … … 926 926 valPrint hn "<tr><td colspan=\"2\" align=\"right\">linked from</td><td><a href=\"$FULL_PAGE_PATH\" target=\"_blank\">$LOCAL_PAGE_PATH</a></td></tr>" 927 927 928 # Place vertical space here since we won't be printing anything more about this link 929 if [ $STATUS == "OK" ]; then valPrint trh ""; fi 930 928 931 # Record redirect URL if one was given by a 3xx response page 929 932 if [ $STATUS == "RD" ]; then … … 990 993 fi 991 994 else 992 valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILEalready exists."995 valPrint trhs "Skipping screenshot of URL '$URL' because file '$SHOT_FILE' already exists." 993 996 fi 994 997 fi
Note:
See TracChangeset
for help on using the changeset viewer.