Changeset 1177 for Validate External Links
- Timestamp:
- Jan 13, 2023, 11:26:56 PM (2 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
Validate External Links/validate_external_links.sh
r1175 r1177 1 1 #!/bin/bash 2 2 3 # Validate External Links by Iritscen 3 # Validate External Links by Iritscen (iritscen@yahoo.com) 4 4 # 5 5 # Validates a list of external links in CSV format. The resulting logs are produced in three formats: … … 32 32 LINKS_URL="" # download external link CSV from this location (can use "file://" protocol) 33 33 EXCEPT_URL="" # location of wiki page with a list of exceptions for NG results 34 OUTPUT_DIR="" # place reports and all other output in a folder inside this existing folder34 OUTPUT_DIR="" # place reports and all other output in a folder inside this existing folder 35 35 RECORD_OK_LINKS=0 # record response code to the log even when it's a value in OK_CODES 36 36 SHOW_SLASH=0 # record issue when a slash is added to the end of a URL … … 98 98 RD_LINKS=0 99 99 NG_LINKS=0 100 SKIP_PARSE_FAIL=0 101 SKIP_UNK_PROT=0 100 102 SKIP_UNK_NS=0 101 103 SKIP_JS_PAGE=0 … … 506 508 LINKS_PROCESSED=$((LINK_NUM-URL_START+1)) 507 509 TRIVIAL_RDS=$((SKIP_SLASH_ADD+SKIP_HTTPS_UP+SKIP_YOUTU_BE)) 508 LINK_ERRORS=$((SKIP_ UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE))510 LINK_ERRORS=$((SKIP_PARSE_FAIL+SKIP_UNK_PROT+SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE)) 509 511 LINKS_EXCEPTED=$((SKIP_EXPECT_NG+SKIP_EXPECT_RD+SKIP_EXPECT_EI+SKIP_EXPECT_IW)) 510 512 LINK_PROBLEMS_TOTAL=$((NG_LINKS+RD_LINKS+EI_LINKS+IW_LINKS)) … … 545 547 valPrint rt "$LINK_ERRORS link $(pluralCheckNoun error $LINK_ERRORS):" 546 548 fi 549 if [ $SKIP_PARSE_FAIL -gt 0 ]; then valPrint ctrh "- $SKIP_PARSE_FAIL line-parsing $(pluralCheckNoun failure $SKIP_PARSE_FAIL)"; fi 550 if [ $SKIP_UNK_PROT -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_PROT unknown $(pluralCheckNoun protocol $SKIP_UNK_PROT)"; fi 547 551 if [ $SKIP_UNK_NS -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_NS missing/unknown $(pluralCheckNoun namespace $SKIP_UNK_NS)"; fi 548 552 if [ $SKIP_JS_PAGE -gt 0 ]; then valPrint ctrh "- $SKIP_JS_PAGE $(pluralCheckNoun link $SKIP_JS_PAGE) on $(pluralCheckA $SKIP_JS_PAGE)JavaScript $(pluralCheckNoun page $SKIP_JS_PAGE)"; fi … … 782 786 wrapupAndExit 783 787 fi 788 789 # Parse line into namespace ID number, containing wiki page, and external link URL 790 NS_ID=${LINE%%,*} 791 PAGE_NAME=${LINE#$NS_ID,} 792 PAGE_NAME=${PAGE_NAME%%,*} # a comma in the page name will break this 793 URL=${LINE#$NS_ID,$PAGE_NAME,} # commas can be in this 794 if [ -z "$NS_ID" ] || [ -z "$PAGE_NAME" ] || [ -z "$URL" ]; then 795 valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace, wiki page or link URL could not be read." 796 let SKIP_PARSE_FAIL+=1 797 continue 798 fi 799 800 # Skip any link that isn't "http://" or "https://" 801 if [[ ! $URL =~ ^http* ]]; then 802 valPrint trs "Skipping line $LINK_NUM ('$LINE') because the protocol isn't 'http://' or 'https://'." 803 let SKIP_UNK_PROT+=1 804 continue 805 fi 784 806 785 807 # Print progress to screen … … 788 810 fi 789 811 valPrint cn "Evaluating URL $LINK_NUM/$LINK_COUNT..." 790 791 # The number of the namespace is the element before the first comma on the line792 NS_ID=${LINE%%,*}793 812 794 813 # Find namespace number in NS_IDS and use it to look up namespace's name in NS_NAMES … … 815 834 fi 816 835 817 # The name of the page is everything between the namespace ID and the next comma on the line (commas818 # in page names will break this)819 PAGE_NAME=${LINE#$NS_ID,}820 PAGE_NAME=${PAGE_NAME%%,*}821 822 836 # Build longer wiki page URLs from namespace and page names 823 837 FULL_PAGE_PATH=https://$WIKI_PATH/$NS_NAME:$PAGE_NAME … … 839 853 continue 840 854 fi 841 842 # The URL being linked to is everything after the previous two fields (this allows commas to be in843 # the URLs, but a comma in the previous field, the page name, will break this)844 URL=${LINE#$NS_ID,$PAGE_NAME,}845 855 846 856 # Scan for illegal characters
Note:
See TracChangeset
for help on using the changeset viewer.