Changeset 1157


Ignore:
Timestamp:
May 9, 2021, 11:53:48 PM (3 years ago)
Author:
iritscen
Message:

ValExtLinks: Make sure that bad YT links count as NG. Various tweaks to project organization.

Location:
Validate External Links
Files:
4 edited
1 moved

Legend:

Unmodified
Added
Removed
  • Validate External Links/validate_external_links.command

    r1150 r1157  
    2222#bash "$VALEXTLINKS" --links "$LINKS_ONLINE" --exceptions "$EXCEPT_ONLINE" --output "$REPORT_DIR" --record-ok-links --suggest-snapshots-ng --end-url 70
    2323
    24 # Run with start/end URLs, record OK codes, and don't upload
    25 #bash "$VALEXTLINKS" --links "$LINKS_ONLINE" --exceptions "$EXCEPT_ONLINE" --output "$REPORT_DIR" --record-ok-links --suggest-snapshots-ng --start-url 2588 --end-url 2594
     24# Run with start/end URLs, record OK codes, suggest OK snapshots, and don't upload
     25#bash "$VALEXTLINKS" --links "$LINKS_ONLINE" --exceptions "$EXCEPT_ONLINE" --output "$REPORT_DIR" --record-ok-links --suggest-snapshots-ng --suggest-snapshots-ok --start-url 2836 --end-url 3300
    2626
    2727# Run with local extlinks and exceptions, start/end URLs, record OK codes, and don't upload
  • Validate External Links/validate_external_links.sh

    r1149 r1157  
    8787
    8888# These are parallel arrays giving the prefixes that can be used in place of normal external links to
    89 # some wikis and other sites
     89# some wikis and other sites; based on https://wiki.oni2.net/Special:Interwiki
    9090declare -a INTERWIKI_PREFIXES=(commons metawikimedia mw wikibooks wikidata wikimedia wikinews wikiquote wikisource wikispecies wikiversity wikivoyage wikt wp)
    9191declare -a INTERWIKI_DOMAINS=(commons.wikimedia.org meta.wikimedia.org mediawiki.org wikibooks.org wikidata.org wikimediafoundation.org wikinews.org wikiquote.org wikisource.org species.wikimedia.org wikiversity.org wikivoyage.org wiktionary.org wikipedia.org)
     
    234234      --end-url )              URL_LIMIT=$2;                       shift 2;;
    235235      --upload )               UPLOAD_INFO=$2;                     shift 2;;
    236       * )                      echo "Invalid argument $1 detected. Aborting."; exit 1;;
     236      * )                      echo "Invalid argument '$1' detected. Aborting."; exit 1;;
    237237  esac
    238238done
     
    960960
    961961            # If this is a YouTube link, we have to look at the actual page source to know if the video
    962             # is good or not
     962            # is good or not; override the link's info if it's actually NG
    963963            if [[ $URL == *www.youtube.com* ]]; then
    964964               PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL | grep "\"simpleText\":\"Video unavailable\"")
    965965               if [ ! -z "$PAGE_TEXT" ]; then
    966966                  STATUS="NG"
     967                  CURL_RESULT=404
    967968                  let OK_LINKS-=1
    968969                  let NG_LINKS+=1
Note: See TracChangeset for help on using the changeset viewer.