Changeset 1158 for Validate External Links
- Timestamp:
- Jun 13, 2021, 10:50:43 PM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
Validate External Links/validate_external_links.sh
r1157 r1158 39 39 SUGGEST_SNAPSHOTS_NG=0 # query the Internet Archive for a possible snapshot URL for each NG page 40 40 SUGGEST_SNAPSHOTS_OK=0 # query the Internet Archive for an existing snapshot of each OK page 41 CHECK_ARCHIVE_LINKS=0 # check URLs under the archive.org domain41 CHECK_ARCHIVE_LINKS=0 # check URLs on archive.org and archive.is 42 42 TAKE_PAGE_SHOT=0 # take a screenshot of each OK page 43 43 TIMEOUT=10 # time to wait for a response when querying a site … … 111 111 SKIP_SLASH_ADD=0 112 112 SKIP_YOUTU_BE=0 113 SKIP_ARCHIVE _ORG=0113 SKIP_ARCHIVES=0 114 114 FILE_LINKS=0 115 115 PAGE_LINKS=0 … … 186 186 --record-ok-links argument. 187 187 --check-archive-links Check links that are already pointing to a page 188 on the Internet Archive. In theory these links 189 should be totally stable and not need validation. 188 on the Internet Archive or archive.is (AKA 189 archive.today). In theory these links should be 190 totally stable and not need validation. 190 191 --take-screenshots FILE Call the Google Chrome binary at this path to 191 192 take screenshots of each "OK" page. … … 532 533 if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi 533 534 if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi 534 if [ $SKIP_ARCHIVE _ORG -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVE_ORG Archive.org $(pluralCheckNoun link $SKIP_ARCHIVE_ORG) were not checked"; fi535 if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi 535 536 if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi 536 537 if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr " (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h " (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi … … 677 678 if [ $SHOW_YT_RD -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi 678 679 679 valPrint ctrhn "Check archive.org links: "680 valPrint ctrhn "Check archive.org and archive.is links: " 680 681 if [ $CHECK_ARCHIVE_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi 681 682 … … 810 811 fi 811 812 812 # If we're skipping Archive.orglinks, see if this is one813 if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ $URL == *web.archive.org*]]; then814 valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check Wayback Machine links."815 let SKIP_ARCHIVE _ORG+=1813 # If we're skipping archive links, see if this is one 814 if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ ( $URL == *web.archive.org* || $URL == *archive.is* ) ]]; then 815 valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check archive links." 816 let SKIP_ARCHIVES+=1 816 817 let PAGE_LINKS+=1 817 818 continue … … 917 918 # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an 918 919 # issue with sites that require HTTPS 919 CURL_CODE=$(curl -o /dev/null --silent --insecure -- head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL)920 CURL_CODE=$(curl -o /dev/null --silent --insecure --compressed --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL) 920 921 CURL_ERR=$(echo $?) 921 922 CURL_RESULT=$CURL_CODE
Note:
See TracChangeset
for help on using the changeset viewer.