Ignore:
Timestamp:
Mar 21, 2020, 11:08:35 PM (5 years ago)
Author:
iritscen
Message:

Fixed bug in Val that was causing dozens of 403 errors to be returned unnecessarily. Polished report wording and messages a little.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • Validate External Links/validate_external_links.sh

    r1122 r1123  
    456456   valPrint r "\b1 Summary \b0 ($ELAPSED)"
    457457   valPrint hn "<h3><span id=\"summary\">Summary ($ELAPSED)</span></h3>"
    458    valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there were $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))."
     458   valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there $(pluralCheckWas $FILE_LINKS) $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))."
    459459
    460460   # Print processed link totals
    461461   if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
    462462   if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
    463    if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had issues"; fi
    464    if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctrh "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
    465    if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) were OK"; fi
    466    if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctrh "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi
     463   if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS processed $(pluralCheckNoun link $LINK_PROBLEMS) had $(pluralCheckAn $LINK_PROBLEMS)$(pluralCheckNoun issue $LINK_PROBLEMS)"; fi
     464   if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; valPrint h "nbsp;nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
     465   if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi
     466   if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; valPrint h "&nbsp;&nbsp;(counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi
    467467
    468468   # Print excepted link totals
     
    482482
    483483   # Print checked link totals
    484    if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issues $LINKS_CHECKED):"; fi
     484   if [ $LINK_PROBLEMS -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS link $(pluralCheckNoun issue $LINK_PROBLEMS):"; fi
    485485   if [ $NG_LINKS -gt 0 ]; then valPrint ctrh "- $NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi
    486486   if [ $RD_LINKS -gt 0 ]; then valPrint ctrh "- $RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi
     
    641641   if [ "$NS_NAME" == "" ]; then
    642642      if [ $NS_ID == "NULL" ]; then
    643          valPrint trs "Skipping URL on line $LINK_NUM because the namespace (and probably the page too) is \"NULL\". Probably the link is no longer in existence on the wiki."
     643         valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace (and probably the page too) is 'NULL'. Probably the link is no longer in existence on the wiki."
    644644      else
    645          valPrint trs "Skipping URL on line $LINK_NUM found on page $PAGE_NAME because I could not find a name for namespace ID $NS_ID."
     645         valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID."
    646646      fi
    647647      let SKIP_UNK_NS+=1
     
    658658   PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
    659659   if [ $PAGE_NAME_SUFFIX == "js" ]; then
    660       valPrint trs "Skipping URL on line $LINK_NUM because it was found on JavaScript page $PAGE_NAME."
     660      valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$PAGE_NAME'."
    661661      let SKIP_JS_PAGE+=1
    662662      continue
     
    679679   # Scan for illegal characters
    680680   if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
    681       valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because it contains characters illegal in a URL."
     681      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because it contains characters illegal in a URL."
    682682      let SKIP_BAD_URL+=1
    683683      continue
     
    696696   # 'sed' cannot handle Unicode in my Bash shell, so skip this URL and make user check it
    697697   if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
    698       valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I cannot handle non-ASCII characters."
     698      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I cannot handle non-ASCII characters."
    699699      let SKIP_NON_ASCII+=1
    700700      continue
     
    759759   STR_TYPE=""
    760760   if [ $IS_FILE -eq -1 ]; then
    761       valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown URL ending $POST_DOT. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
     761      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown URL ending '$POST_DOT'. Please add this ending to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
    762762      let SKIP_UNK_SUFFIX+=1
    763763      continue
     
    772772   # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an
    773773   # issue with sites that require HTTPS
    774    CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{http_code}\n' $URL)
     774   CURL_CODE=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{http_code}\n' $URL)
    775775   CURL_ERR=$(echo $?)
    776776   CURL_RESULT=$CURL_CODE
     
    822822         if [[ $CODE == $CURL_CODE ]]; then
    823823            # Get URL header again in order to retrieve the URL we are being redirected to
    824             NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '"$AGENT"' --max-time 10 --write-out '%{redirect_url}\n' $URL)
     824            NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time 10 --write-out '%{redirect_url}\n' $URL)
    825825
    826826            # Adjust the old and new URLs to both use HTTP for comparison purposes, so we can filter
     
    842842            # wants those to be reported)
    843843            if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then
    844                valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show http->https upgrades, and we were redirected to $NEW_URL."
     844               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'."
    845845               STATUS="OK"
    846846               let OK_LINKS+=1
     
    849849            # those to be reported)
    850850            elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then
    851                valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because we have not been asked to show added trailing slashes, and we were redirected to $NEW_URL."
     851               valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'."
    852852               STATUS="OK"
    853853               let OK_LINKS+=1
     
    875875   # If we didn't match a known status code, advise the reader
    876876   if [ $STATUS == "??" ]; then
    877       valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because I encountered the unknown return code $CURL_CODE."
     877      valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because I encountered the unknown return code $CURL_CODE."
    878878      let SKIP_UNK_CODE+=1
    879879      continue
     
    896896         EXCEPT_CODE=${GREP_RESULT%%,*}
    897897         if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then
    898             valPrint trs "Skipping URL $URL (found on page $PAGE_NAME) because its expected result, $EXPECT_CODE, is listed in the exceptions file."
     898            valPrint trs "Skipping URL '$URL' (found on page '$PAGE_NAME') because its expected result, $EXPECT_CODE, is listed in the exceptions file."
    899899            if [ $STATUS == "EI" ]; then
    900900               let SKIP_EXPECT_EI+=1
     
    926926      valPrint hn "<tr><td colspan=\"2\" align=\"right\">linked from</td><td><a href=\"$FULL_PAGE_PATH\" target=\"_blank\">$LOCAL_PAGE_PATH</a></td></tr>"
    927927
     928      # Place vertical space here since we won't be printing anything more about this link
     929      if [ $STATUS == "OK" ]; then valPrint trh ""; fi
     930
    928931      # Record redirect URL if one was given by a 3xx response page
    929932      if [ $STATUS == "RD" ]; then
     
    990993         fi
    991994      else
    992          valPrint trhs "Skipping screenshot of URL $URL because $SHOT_FILE already exists."
     995         valPrint trhs "Skipping screenshot of URL '$URL' because file '$SHOT_FILE' already exists."
    993996      fi
    994997   fi
Note: See TracChangeset for help on using the changeset viewer.