Ignore:
Timestamp:
Jan 23, 2023, 2:51:32 AM (2 years ago)
Author:
iritscen
Message:

ValExtLinks now supports wildcards in the URL too, not just the containing page. Added Cloudflare's error code 520 to list of recognized errors.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • Validate External Links/validate_external_links.sh

    r1177 r1178  
    4848
    4949# Fixed strings -- see the occurrences of these variables to learn their purpose
    50 AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36"
     50AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36"
    5151ARCHIVE_API="http://archive.org/wayback/available"
    5252ARCHIVE_GENERIC="https://web.archive.org/web/*"
     
    7777declare -a OK_CODES=(200 401 405 406 418 501)
    7878declare -a RD_CODES=(301 302 303 307 308)
    79 declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 530)
     79declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 520 530)
    8080
    8181# Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using
     
    124124### HELP OUTPUT ###
    125125# A pseudo-man page. Here is the 80-character rule for the page text:
    126 # 234567890123456789012345678901234567890123456789012345678901234567890123456789
     126# 345678901234567890123456789012345678901234567890123456789012345678901234567890
    127127function printHelp()
    128128{
     
    535535   if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
    536536   if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
    537    if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi
     537   if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) $(pluralCheckWas $SKIP_ARCHIVES) not checked"; fi
    538538   if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi
    539539   if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
     
    11371137         EXCEPT_URL="${EXCEPT_LINE#*,}"
    11381138         EXCEPT_URL="${EXCEPT_URL%,*}"
    1139          if [ "$EXCEPT_URL" != "$URL" ]; then
    1140             continue
     1139         if [[ "$EXCEPT_URL" =~ \* ]]; then # if this exception URL contains the '*' wildcard, use pattern-matching with it
     1140            if [[ "$URL" =~ "$EXCEPT_URL" ]]; then
     1141               continue
     1142            fi
     1143         else
     1144            if [ "$EXCEPT_URL" != "$URL" ]; then # otherwise just use a straight string comparison
     1145               continue
     1146            fi
    11411147         fi
    11421148
Note: See TracChangeset for help on using the changeset viewer.