source: Validate External Links/validate_external_links.sh @ 1177

Last change on this file since 1177 was 1177, checked in by iritscen, 5 months ago

ValExtLinks now skips URLs that aren't HTTP(S) protocol. Added some error-checking on line parsing. Added my email address.

File size: 57.5 KB
Line 
1#!/bin/bash
2
3# Validate External Links by Iritscen (iritscen@yahoo.com)
4#
5# Validates a list of external links in CSV format. The resulting logs are produced in three formats:
6# - TXT (for easy diffing with an earlier log)
7# - RTF (for reading as a local file with clickable links)
8# - HTML (for reading as a web page)
9# Call script with "--help" argument for documentation. Also see Read Me.rtf for critical notes.
10#
11# Recommended rule:
12# |----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----- ----|
13#
14# Table of contents (sections of script in order of appearance, not execution):
15# • Globals
16# • Help Output
17# • Setup
18# • Utility Functions
19# • Summary Output
20# • Initialization
21#   • Data Sourcing
22#   • Config Output
23#   • Legend Output
24# • Main Loop
25
26# Set separator token to newline
27IFS="
28"
29
30### GLOBALS ###
31# Settings -- these will be changed from their defaults by the arguments passed in to the script
32LINKS_URL=""           # download external link CSV from this location (can use "file://" protocol)
33EXCEPT_URL=""          # location of wiki page with a list of exceptions for NG results
34OUTPUT_DIR=""           # place reports and all other output in a folder inside this existing folder
35RECORD_OK_LINKS=0      # record response code to the log even when it's a value in OK_CODES
36SHOW_SLASH=0           # record issue when a slash is added to the end of a URL
37SHOW_HTTPS=0           # record issue when "http" is upgraded to "https"
38SHOW_YT_RD=0           # record redirection for a youtu.be URL expanding to the full URL
39SUGGEST_SNAPSHOTS_NG=0 # query the Internet Archive for a possible snapshot URL for each NG page
40SUGGEST_SNAPSHOTS_OK=0 # query the Internet Archive for an existing snapshot of each OK page
41CHECK_ARCHIVE_LINKS=0  # check URLs on archive.org and archive.is
42TAKE_PAGE_SHOT=0       # take a screenshot of each OK page
43TIMEOUT=10             # time to wait for a response when querying a site
44CHROME_PATH=""         # path to a copy of Google Chrome that has the command-line screenshot feature
45URL_START=1            # start at this URL in LINKS_FILE
46URL_LIMIT=0            # if non-zero, stop at this URL in LINKS_FILE
47UPLOAD_INFO=""         # path to a file on your hard drive with the login info needed to upload a report
48
49# Fixed strings -- see the occurrences of these variables to learn their purpose
50AGENT="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.134 Safari/537.36"
51ARCHIVE_API="http://archive.org/wayback/available"
52ARCHIVE_GENERIC="https://web.archive.org/web/*"
53ARCHIVE_OK_CODES="statuscodes=200&statuscodes=203&statuscodes=206"
54CHROME_SCREENSHOT="screenshot.png"
55EXCEPT_FILE_NAME="exceptions.txt"
56EXPECT_SCRIPT_NAME="val_expect_sftp.txt"
57WIKI_CURL="https://wiki.oni2.net/Validate_External_Links/Curl_codes"
58WIKI_HTTP="https://wiki.oni2.net/Validate_External_Links/HTTP_codes"
59WIKI_MAIN="https://wiki.oni2.net/Validate_External_Links"
60WIKI_ME="http://iritscen.oni2.net"
61THIS_DIR=$(cd $(dirname $0); pwd)
62WORKING_DIR=$(pwd)
63WIKI_PATH="wiki.oni2.net"
64
65# These are parallel arrays of the IDs and names of OniGalore's current namespaces
66declare -a NS_IDS=(-2 -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 100 101 102 103 104 105 108 109 110 111)
67declare -a NS_NAMES=("Media" "Special" "Main" "Talk" "User" "User_talk" "OniGalore" "OniGalore_talk" "File" "File_talk" "MediaWiki" "MediaWiki_talk" "Template" "Template_talk" "Help" "Help_talk" "Category" "Category_talk" "BSL" "BSL_talk" "OBD" "OBD_talk" "AE" "AE_talk" "Oni2" "Oni2_talk" "XML" "XML_talk")
68
69# These arrays tell the script which suffixes at the ends of URLs represent files and which are pages.
70# This determines whether the script tries to take a screenshot of the URL or just gets its HTTP code.
71declare -a HTTP_FILES=(3ds 7z avi BINA blend bsl cpp csv dae dll dmg doc east exe fbx first63 flv gamedata gif jpg last32 log m4a mhm mov mp3 mp4 oni ONWC pdf png psd py rar tga TRMA txt vbs wav wmv xaf xcf xlsx xml zip)
72declare -a HTTP_TLDS_AND_PAGES=(abstract action ars asp aspx cfm cgi com css de do full htm html it js jsp net org pgi php php3 phtml pl ru shtml stm uk x)
73
74# These arrays tells us which HTTP response codes are OK (good), which are RD (redirections), and which
75# are NG (no good). Pages that return OK codes will be screenshotted. Remember to update http_codes.txt
76# if you add a new code.
77declare -a OK_CODES=(200 401 405 406 418 501)
78declare -a RD_CODES=(301 302 303 307 308)
79declare -a NG_CODES=(000 400 403 404 410 429 500 502 503 504 530)
80
81# Characters not allowed in a URL. Curly braces are sometimes used on the wiki to build a link using
82# transcluded text, and if the transclusion fails, then the braces show up in the URL
83ILLEGAL_CHARS="{ }"
84
85# The shortest URL possible, used for sanity-checking some URLs: http://a.co
86MIN_URL_LENGTH=11
87
88# These are parallel arrays giving the prefixes that can be used in place of normal external links to
89# some wikis and other sites; based on https://wiki.oni2.net/Special:Interwiki
90declare -a INTERWIKI_PREFIXES=(commons metawikimedia mw wikibooks wikidata wikimedia wikinews wikiquote wikisource wikispecies wikiversity wikivoyage wikt wp)
91declare -a INTERWIKI_DOMAINS=(commons.wikimedia.org meta.wikimedia.org mediawiki.org wikibooks.org wikidata.org wikimediafoundation.org wikinews.org wikiquote.org wikisource.org species.wikimedia.org wikiversity.org wikivoyage.org wiktionary.org wikipedia.org)
92
93# Variables for keeping track of main loop progress and findings
94LINK_NUM=0
95EI_LINKS=0
96IW_LINKS=0
97OK_LINKS=0
98RD_LINKS=0
99NG_LINKS=0
100SKIP_PARSE_FAIL=0
101SKIP_UNK_PROT=0
102SKIP_UNK_NS=0
103SKIP_JS_PAGE=0
104SKIP_BAD_URL=0
105SKIP_NON_ASCII=0
106SKIP_UNK_SUFFIX=0
107SKIP_UNK_CODE=0
108SKIP_EXPECT_NG=0
109SKIP_EXPECT_RD=0
110SKIP_EXPECT_EI=0
111SKIP_EXPECT_IW=0
112SKIP_HTTPS_UP=0
113SKIP_SLASH_ADD=0
114SKIP_YOUTU_BE=0
115SKIP_ARCHIVES=0
116FILE_LINKS=0
117PAGE_LINKS=0
118SKIPPED_HEADER_ROW=0
119FINISHED_LIST="no"
120START_RUN=0
121END_RUN=0
122
123
124### HELP OUTPUT ###
125# A pseudo-man page. Here is the 80-character rule for the page text:
126# 234567890123456789012345678901234567890123456789012345678901234567890123456789
127function printHelp()
128{
129  cat << EOF
130
131NAME
132       Validate External Links
133
134SYNOPSIS
135       validate_external_links.sh --help
136       validate_external_links.sh --links URL --output DIR [--exceptions URL]
137          [--record-ok-links] [--show-added-slashes] [--show-https-upgrades]
138          [--show-yt-redirects] [--suggest-snapshots] [--check-archive-links]
139          [--take-screenshots FILE] [--timeout NUM] [--start-url NUM]
140          [--end-url NUM] [--upload FILE]
141
142DESCRIPTION
143       This script parses a list of external links found in the OniGalore wiki
144       (which is dumped by the Oni2.net server periodically in a particular
145       format), validates them using the Unix tool 'curl', and produces a report
146       of which links were "OK" (responded positively to an HTTP query), which
147       were "RD" (responded with a 3xx redirect code), which could be "IW"
148       (interwiki) links, which are "EI" (external internal) links and could be
149       intrawiki links, and which were "NG" (no good; a negative response to the
150       query). This report can then be automatically uploaded to the location of
151       your choice. The script can also suggest Internet Archive snapshots for
152       "NG" links, and take screenshots of "OK" links for visual verification by
153       the reader that the page in question is the one intended to be displayed.
154
155       You must pass this script the URL at which the list of links is found
156       (--links) and the path where the directory of logs should be outputted
157       (--output). All other arguments are optional.
158
159OPTIONS
160       --help                  Show this page.
161       --links URL             (required) URL from which to download the CSV
162                               file with external links. Note that this URL can
163                               be a local file if you supply a file:// path.
164       --output DIR            (required) Unix path to directory in which Val
165                               should place its reports.
166       --exceptions URL        In order to remove links from the report which
167                               Val finds an issue with but which you regard as
168                               OK, list those desired exceptions on a wiki page.
169                               See the sample file "exceptions.pdf" for the
170                               required format of the page. Note that this URL
171                               can point to a local file if you supply a path
172                               beginning with "file://".
173       --record-ok-links       Log a link in the report even if its response
174                               code is "OK".
175       --show-added-slashes    Report on redirects that simply add a '/' to the
176                               end of the URL.
177       --show-https-upgrades   Report on redirects that simply upgrade a
178                               "http://" URL to a "https://" URL.
179       --show-yt-redirects     Report on redirects that expand a youtu.be URL.
180       --suggest-snapshots-ng  Query the Internet Archive for a possible
181                               snapshot URL for each "NG" page.
182       --suggest-snapshots-ok  Query the Internet Archive for a snapshot of each
183                               "OK" page just to make sure it's available. Note
184                               that this will add a tremendous amount of time to
185                               the script execution because there is a rate
186                               limit to the Archive API. Note that this option
187                               does nothing unless you also use the
188                               --record-ok-links argument.
189       --check-archive-links   Check links that are already pointing to a page
190                               on the Internet Archive or archive.is (AKA
191                               archive.today). In theory these links should be
192                               totally stable and not need validation.
193       --take-screenshots FILE Call the Google Chrome binary at this path to
194                               take screenshots of each "OK" page.
195       --timeout NUM           Wait this many seconds for a site to respond. The
196                               default is 10. Important note: Val will attempt
197                               to reach each URL three times, so the time taken
198                               to ping an unresponsive site will be three times
199                               this setting.
200       --start-url NUM         Start at this link in the links CSV file.
201       --end-url NUM           Stop at this link in the links CSV file.
202       --upload FILE           Upload report using the credentials and path
203                               given in this local text file. See sftp_login.txt
204                               for template.
205
206BUGS
207       The script cannot properly parse any line in the external links file
208       which contains a comma in the name of the wiki page containing a link.
209       Commas in the link itself are not an issue.
210EOF
211}
212
213
214### SETUP ###
215# If first argument is a help request, or if nothing was passed in at all, print help page and quit
216if [ "$#" -eq 0 ] || [ "$1" == "--help" ]; then
217  printHelp | less
218  exit 0
219fi
220
221# Parse arguments as long as there are more arguments to process
222while (( "$#" )); do
223   case "$1" in
224      --links )                LINKS_URL="$2";                     shift 2;;
225      --exceptions )           EXCEPT_URL="$2";                    shift 2;;
226      --output )               OUTPUT_DIR="$2";                    shift 2;;
227      --record-ok-links )      RECORD_OK_LINKS=1;                  shift;;
228      --show-added-slashes )   SHOW_SLASH=1;                       shift;;
229      --show-https-upgrades )  SHOW_HTTPS=1;                       shift;;
230      --show-yt-redirects )    SHOW_YT_RD=1;                       shift;;
231      --suggest-snapshots-ng ) SUGGEST_SNAPSHOTS_NG=1;             shift;;
232      --suggest-snapshots-ok ) SUGGEST_SNAPSHOTS_OK=1;             shift;;
233      --check-archive-links )  CHECK_ARCHIVE_LINKS=1;              shift;;
234      --take-screenshots )     TAKE_PAGE_SHOT=1; CHROME_PATH="$2"; shift 2;;
235      --timeout )              TIMEOUT=$2;                         shift 2;;
236      --start-url )            URL_START=$2;                       shift 2;;
237      --end-url )              URL_LIMIT=$2;                       shift 2;;
238      --upload )               UPLOAD_INFO=$2;                     shift 2;;
239      * )                      echo "Invalid argument '$1' detected. Aborting."; exit 1;;
240  esac
241done
242
243# If the required arguments were not supplied, print help page and quit
244if [ -z $LINKS_URL ] || [ -z $OUTPUT_DIR ]; then
245   echo "Error: I did not receive one or both required arguments. Run me with the \"--help\" argument for documentation."
246   exit 2
247fi
248
249# If user wants screenshots, make sure path to Chrome was passed in and is valid
250if [ $TAKE_PAGE_SHOT -eq 1 ]; then
251   if [ ! -f "$CHROME_PATH" ]; then
252      echo "Error: You need to supply a path to the Google Chrome application in order to take screenshots."
253      exit 3
254   fi
255fi
256
257# Check that UPLOAD_INFO exists, if this argument was supplied
258if [ ! -z $UPLOAD_INFO ] && [ ! -f "$UPLOAD_INFO" ]; then
259   echo "Error: The file $UPLOAD_INFO supplied by the --upload argument does not appear to exist. Aborting."
260   exit 4
261fi
262
263# Check that OUTPUT_DIR is a directory
264if [ ! -d "$OUTPUT_DIR" ]; then
265   echo "Error: The path $OUTPUT_DIR supplied by the --output argument does not appear to be a directory. Aborting."
266   exit 5
267fi
268
269# Make timestamped folder inside OUTPUT_DIR for this session's log and screenshots
270SAFE_TIME=$(date "+%Y-%m-%d--%H-%M-%S") # for use in path names
271NICE_TIME=$(TZ=":UTC" date "+%a, %d %b %Y %k:%M:%S GMT") # matches format of LINKS_DATE, which is extracted from the extlinks file
272OUTPUT_FOLDER="ValExtLinks ($SAFE_TIME)"
273OUTPUT_PATH="$OUTPUT_DIR/$OUTPUT_FOLDER"
274SHOT_PATH="$OUTPUT_PATH/Screenshots"
275LOG_NAME="ValExtLinks report"
276LOG_NAME_TXT="$LOG_NAME.txt"
277LOG_NAME_RTF="$LOG_NAME.rtf"
278LOG_NAME_HTM="$LOG_NAME.htm"
279LOG_PATH="$OUTPUT_PATH/$LOG_NAME"
280LOG_PATH_TXT="$LOG_PATH.txt"
281LOG_PATH_RTF="$LOG_PATH.rtf"
282LOG_PATH_HTM="$LOG_PATH.htm"
283mkdir "$OUTPUT_PATH"
284if [ $TAKE_PAGE_SHOT -eq 1 ]; then
285   mkdir "$SHOT_PATH"
286fi
287
288# Check that 'mkdir' succeeded
289if [ ! -d "$OUTPUT_PATH" ]; then
290   echo "Error: I could not create the folder \"$OUTPUT_FOLDER\" inside the directory $OUTPUT_PATH. Aborting."
291   exit 6
292fi
293
294# Get date on the file at LINKS_URL and print to log
295LINKS_DATE=$(curl --silent --head $LINKS_URL | grep "Last-Modified")
296if [ -z "$LINKS_DATE" ]; then
297   echo "Error: I could not find the external links file at the path \"$LINKS_URL\" supplied by the --links argument. Aborting."
298   exit 7
299fi
300LINKS_DATE=${LINKS_DATE#Last-Modified: }
301
302
303### UTILITY FUNCTIONS ###
304# Writes a plain-text header to TXT log file
305function printTXTheader()
306{
307   valPrint t "Validate External Links report"
308   valPrint t "generated $NICE_TIME"
309   valPrint t "from data of $LINKS_DATE"
310   valPrint t "script by Iritscen (contact: $WIKI_ME)"
311   valPrint t ""
312}
313
314# Writes the RTF header to RTF log file, then a centered title, then sets text to left-justified
315function printRTFheader()
316{
317   valPrint r "{\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf820
318{\fonttbl\f0\fswiss\fcharset0 Helvetica;}
319{\colortbl;\red255\green255\blue255;}
320{\*\expandedcolortbl;;}
321\margl1440\margr1440\vieww12600\viewh12100\viewkind0
322\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\li360\pardirnatural\qc\partightenfactor0
323
324\f0\fs28 \cf0 \b1 Validate External Links report \b0\\
325generated $NICE_TIME\\
326from data of $LINKS_DATE\\
327script by Iritscen ({\field{\*\fldinst{HYPERLINK \"$WIKI_ME\"}}{\fldrslt contact}})
328\\
329\pard\tx720\tx1440\tx2160\tx2880\tx3600\tx4320\tx5040\tx5760\tx6480\tx7200\tx7920\tx8640\li360\pardirnatural\partightenfactor0
330\cf0 "
331}
332
333# Closes the RTF markup of the RTF log file
334function printRTFfooter()
335{
336   valPrint r "}"
337}
338
339# Writes the HTML header to HTML log file
340function printHTMheader()
341{
342   valPrint h "<html>
343<head>
344<title>Validate External Links report</title>
345</head>
346<body>
347<h2>Validate External Links report</h2>
348<h3>generated $NICE_TIME<br />
349from data of $LINKS_DATE<br />
350script by Iritscen (<a href=\"$WIKI_ME\" target=\"_blank\">contact</a>)</h3>"
351}
352
353# Closes the HTML markup of the HTML log file
354function printHTMfooter()
355{
356   valPrint h "</body>
357</html>"
358}
359
360# The central logging function. The first parameter is a string composed of one or more characters that
361# indicate which output to use: 'c' means console, 't' means the TXT log, 'r' means the RTF log, and
362# 'h' means the HTML log. 'n' means "Don't print a newline at the end of the line." 's' means "Print an
363# extra newline at the end." 'w' means "Don't pass console output through 'fmt'" ("fmt" fits the output
364# to an 80-column CLI but can break special formatting and the 'n' option).
365function valPrint()
366{
367   if [[ "$1" == *c* ]]; then
368      if [[ "$1" == *n* ]]; then
369         echo -n "$2"
370      elif [[ "$1" == *w* ]]; then
371         echo "$2"
372      elif [[ "$1" == *s* ]]; then
373         echo -e "$2\n"
374      else
375         echo "$2" | fmt -w 80
376      fi
377   fi
378   if [[ "$1" == *t* ]]; then
379      if [[ "$1" == *n* ]]; then
380         echo -n "$2" >> "$LOG_PATH_TXT"
381      elif [[ "$1" == *s* ]]; then
382         echo -e "$2\n" >> "$LOG_PATH_TXT"
383      else
384         echo "$2" >> "$LOG_PATH_TXT"
385      fi
386   fi
387   if [[ "$1" == *r* ]]; then
388      if [[ "$1" == *n* ]]; then
389         echo "$2" >> "$LOG_PATH_RTF"
390      elif [[ "$1" == *s* ]]; then
391         echo "$2\line\line" >> "$LOG_PATH_RTF"
392      else
393         echo "$2\line" >> "$LOG_PATH_RTF"
394      fi
395   fi
396   if [[ "$1" == *h* ]]; then
397      if [[ "$1" == *s* ]]; then
398         echo "$2<tr><td>&nbsp;</td></tr>" >> "$LOG_PATH_HTM"
399      elif [[ "$1" == *n* ]]; then
400         echo "$2" >> "$LOG_PATH_HTM"
401      else
402         echo "$2<br />" >> "$LOG_PATH_HTM"
403      fi
404   fi
405}
406
407# Pluralize the string in parameter 1 if the number in parameter 2 is not 1
408function pluralCheckNoun()
409{
410   if [ $2 -ne 1 ]; then
411      if [[ $1 =~ x$ ]]; then
412         echo $1es
413      else
414         echo $1s
415      fi
416   else
417      echo $1
418   fi
419}
420
421# Output "is" if parameter 1 is 1, otherwise "are"
422function pluralCheckIs()
423{
424   if [ $1 -ne 1 ]; then
425      echo "are"
426   else
427      echo "is"
428   fi
429}
430
431# Output "was" if parameter 1 is 1, otherwise "were"
432function pluralCheckWas()
433{
434   if [ $1 -ne 1 ]; then
435      echo "were"
436   else
437      echo "was"
438   fi
439}
440
441# Output "a " if parameter 1 is 1, otherwise nothing
442function pluralCheckA()
443{
444   if [ $1 -eq 1 ]; then
445      echo "a "
446   fi
447}
448
449# Output "an " if parameter 1 is 1, otherwise nothing
450function pluralCheckAn()
451{
452   if [ $1 -eq 1 ]; then
453      echo "an "
454   fi
455}
456
457# Upload the reports using info specified in the --upload argument. ONLY USE "valPrint c" here, as the
458# reports being saved to disk have already been closed.
459function uploadReport()
460{
461   valPrint c "Uploading reports..."
462
463   SCRIPT_PATH="$THIS_DIR/$EXPECT_SCRIPT_NAME"
464   SFTP_USER_NAME_MARKER="user:"
465   SFTP_PASSWORD_MARKER="pw:"
466   SFTP_PORT_MARKER="port:"
467   SFTP_PATH_MARKER="path:"
468   SFTP_USER_NAME=$(grep $SFTP_USER_NAME_MARKER $UPLOAD_INFO)
469   SFTP_USER_NAME=${SFTP_USER_NAME#$SFTP_USER_NAME_MARKER}
470   SFTP_PASSWORD=$(grep $SFTP_PASSWORD_MARKER $UPLOAD_INFO)
471   SFTP_PASSWORD=${SFTP_PASSWORD#$SFTP_PASSWORD_MARKER}
472   SFTP_PORT=$(grep $SFTP_PORT_MARKER $UPLOAD_INFO)
473   SFTP_PORT=${SFTP_PORT#$SFTP_PORT_MARKER}
474   SFTP_PATH=$(grep $SFTP_PATH_MARKER $UPLOAD_INFO)
475   SFTP_PATH=${SFTP_PATH#$SFTP_PATH_MARKER}
476
477   for SUFFIX in htm rtf txt; do
478      expect "$SCRIPT_PATH" "$LOG_PATH.$SUFFIX" $SFTP_USER_NAME $SFTP_PASSWORD $SFTP_PORT $SFTP_PATH "$LOG_NAME.$SUFFIX"
479
480      if [ "$?" -ne 0 ]; then
481         valPrint c "Error $? occurred when attempting to upload $LOG_NAME.$SUFFIX!"
482      else
483         valPrint c "Report in `echo $SUFFIX | tr [:lower:] [:upper:]` format was uploaded."
484      fi
485   done
486}
487
488# Prints session summary when script is done
489function wrapupAndExit()
490{
491   # Get off progress line on console, drop down a line from last link in log, and close HTML table
492   valPrint ctr ""
493   valPrint h "</table><br />"
494
495   # If we didn't finish processing the last URL, then the iterator is one too high
496   if [ $FINISHED_LIST != "yes" ]; then
497      let LINK_NUM-=1
498      if [ $FINISHED_LIST == "no" ]; then
499         valPrint ctrh "The session was canceled by the user."
500      fi
501   fi
502
503   # Generate string with elapsed time
504   END_RUN=$(date +%s)
505   ELAPSED=$(echo $(($END_RUN - $START_RUN)) | awk '{printf "%d min. %d sec. elapsed", int($1/60), int($1%60)}')
506
507   # Do some math on results of session
508   LINKS_PROCESSED=$((LINK_NUM-URL_START+1))
509   TRIVIAL_RDS=$((SKIP_SLASH_ADD+SKIP_HTTPS_UP+SKIP_YOUTU_BE))
510   LINK_ERRORS=$((SKIP_PARSE_FAIL+SKIP_UNK_PROT+SKIP_UNK_NS+SKIP_JS_PAGE+SKIP_BAD_URL+SKIP_NON_ASCII+SKIP_UNK_SUFFIX+SKIP_UNK_CODE))
511   LINKS_EXCEPTED=$((SKIP_EXPECT_NG+SKIP_EXPECT_RD+SKIP_EXPECT_EI+SKIP_EXPECT_IW))
512   LINK_PROBLEMS_TOTAL=$((NG_LINKS+RD_LINKS+EI_LINKS+IW_LINKS))
513   LINK_PROBLEMS_NG=$((NG_LINKS-SKIP_EXPECT_NG))
514   LINK_PROBLEMS_RD=$((RD_LINKS-SKIP_EXPECT_RD))
515   LINK_PROBLEMS_EI=$((EI_LINKS-SKIP_EXPECT_EI))
516   LINK_PROBLEMS_IW=$((IW_LINKS-SKIP_EXPECT_IW))
517   LINK_PROBLEMS_NET=$((LINK_PROBLEMS_NG+LINK_PROBLEMS_RD+LINK_PROBLEMS_EI+LINK_PROBLEMS_IW))
518
519   # Print something in the Links section if no link issues were printed
520   if [ $LINK_PROBLEMS_NET -eq 0 ]; then
521      valPrint h "<i>No link problems to report! See the <a href=\"$LOG_NAME_RTF\" target=\"_blank\">RTF</a> or <a href=\"$LOG_NAME_TXT\" target=\"_blank\">TXT</a> report for a list of links with issues that were not reported.</i>"
522   fi
523   if [ $LINK_PROBLEMS_TOTAL -eq 0 ]; then
524      valPrint t "No link problems to report!"
525      valPrint r "\i1 No link problems to report! \i0"
526   fi
527
528   ## SUMMARY OUTPUT ##
529   valPrint ct "Summary ($ELAPSED):"
530   valPrint r "\b1 Summary \b0 ($ELAPSED)"
531   valPrint hn "<h3><span id=\"summary\">Summary ($ELAPSED)</span></h3>"
532   valPrint ctrh "I finished processing $LINKS_PROCESSED of $LINK_COUNT $(pluralCheckNoun link $LINK_COUNT) (there $(pluralCheckWas $FILE_LINKS) $FILE_LINKS file $(pluralCheckNoun link $FILE_LINKS) and $PAGE_LINKS page $(pluralCheckNoun link $PAGE_LINKS))."
533
534   # Print processed link totals
535   if [ $LINKS_PROCESSED -gt 0 ]; then valPrint ctrh "$LINKS_PROCESSED processed $(pluralCheckNoun link $LINKS_PROCESSED):"; fi
536   if [ $LINK_ERRORS -gt 0 ]; then valPrint ctrh "- $LINK_ERRORS $(pluralCheckNoun link $LINK_ERRORS) could not be processed"; fi
537   if [ $SKIP_ARCHIVES -gt 0 ]; then valPrint ctrh "- $SKIP_ARCHIVES archive.org/archive.is $(pluralCheckNoun link $SKIP_ARCHIVES) were not checked"; fi
538   if [ $LINK_PROBLEMS_TOTAL -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_TOTAL processed $(pluralCheckNoun link $LINK_PROBLEMS_TOTAL) had $(pluralCheckAn $LINK_PROBLEMS_TOTAL)$(pluralCheckNoun issue $LINK_PROBLEMS_TOTAL)"; fi
539   if [ $LINKS_EXCEPTED -gt 0 ]; then valPrint ctr "  (excepted $LINKS_EXCEPTED link $(pluralCheckNoun issue $LINKS_EXCEPTED) from report)"; valPrint h "&nbsp;&nbsp;(excepted $LINKS_EXCEPTED $(pluralCheckNoun link $LINKS_EXCEPTED) from report)"; fi
540   if [ $OK_LINKS -gt 0 ]; then valPrint ctrh "- $OK_LINKS processed $(pluralCheckNoun link $OK_LINKS) $(pluralCheckWas $OK_LINKS) OK"; fi
541   if [ $TRIVIAL_RDS -gt 0 ]; then valPrint ctr "  (counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; valPrint h "&nbsp;&nbsp;(counted $TRIVIAL_RDS trivial $(pluralCheckNoun redirection $TRIVIAL_RDS) as OK)"; fi
542
543   # Print errored link totals
544   if [ $LINK_ERRORS -gt 0 ]; then
545      valPrint c "$LINK_ERRORS link $(pluralCheckNoun error $LINK_ERRORS) (see RTF or TXT report for specific links):"
546      valPrint h "$LINK_ERRORS link $(pluralCheckNoun error $LINK_ERRORS) (see <a href=\"$LOG_NAME_RTF\" target=\"_blank\">RTF</a> or <a href=\"$LOG_NAME_TXT\" target=\"_blank\">TXT</a> report for specific links):"
547      valPrint rt "$LINK_ERRORS link $(pluralCheckNoun error $LINK_ERRORS):"
548   fi
549   if [ $SKIP_PARSE_FAIL -gt 0 ]; then valPrint ctrh "- $SKIP_PARSE_FAIL line-parsing $(pluralCheckNoun failure $SKIP_PARSE_FAIL)"; fi
550   if [ $SKIP_UNK_PROT -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_PROT unknown $(pluralCheckNoun protocol $SKIP_UNK_PROT)"; fi
551   if [ $SKIP_UNK_NS -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_NS missing/unknown $(pluralCheckNoun namespace $SKIP_UNK_NS)"; fi
552   if [ $SKIP_JS_PAGE -gt 0 ]; then valPrint ctrh "- $SKIP_JS_PAGE $(pluralCheckNoun link $SKIP_JS_PAGE) on $(pluralCheckA $SKIP_JS_PAGE)JavaScript $(pluralCheckNoun page $SKIP_JS_PAGE)"; fi
553   if [ $SKIP_BAD_URL -gt 0 ]; then valPrint ctrh "- $SKIP_BAD_URL illegal $(pluralCheckNoun URL $SKIP_BAD_URL)"; fi
554   if [ $SKIP_NON_ASCII -gt 0 ]; then valPrint ctrh "- $SKIP_NON_ASCII non-ASCII $(pluralCheckNoun URL $SKIP_NON_ASCII)"; fi
555   if [ $SKIP_UNK_SUFFIX -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_SUFFIX unknown URL $(pluralCheckNoun suffix $SKIP_UNK_SUFFIX)"; fi
556   if [ $SKIP_UNK_CODE -gt 0 ]; then valPrint ctrh "- $SKIP_UNK_CODE unknown response $(pluralCheckNoun code $SKIP_UNK_CODE)"; fi
557
558   # Print excepted link totals
559   if [ $LINKS_EXCEPTED -gt 0 ]; then
560      valPrint c "$LINKS_EXCEPTED link $(pluralCheckNoun problem $LINKS_EXCEPTED) excepted (see RTF or TXT report for specific links):"
561      valPrint h "$LINKS_EXCEPTED link $(pluralCheckNoun problem $LINKS_EXCEPTED) excepted (see <a href=\"$LOG_NAME_RTF\" target=\"_blank\">RTF</a> or <a href=\"$LOG_NAME_TXT\" target=\"_blank\">TXT</a> report for specific links):"
562      valPrint rt "$LINKS_EXCEPTED link $(pluralCheckNoun problem $LINKS_EXCEPTED) excepted:"
563   fi
564   if [ $SKIP_EXPECT_NG -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_NG/$NG_LINKS NG $(pluralCheckNoun link $NG_LINKS)"; fi
565   if [ $SKIP_EXPECT_RD -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_RD/$RD_LINKS $(pluralCheckNoun redirection $RD_LINKS)"; fi
566   if [ $SKIP_EXPECT_EI -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_EI/$EI_LINKS external internal $(pluralCheckNoun link $EI_LINKS)"; fi
567   if [ $SKIP_EXPECT_IW -gt 0 ]; then valPrint ctrh "- $SKIP_EXPECT_IW/$IW_LINKS potential intrawiki $(pluralCheckNoun link $IW_LINKS)"; fi
568
569   # Perform exceptions audit
570   EXCEPTION_ISSUES=0
571   valPrint ctrh "Exceptions list audit:"
572   for ((i = 0; i < ${#EXCEPT_ARRAY[@]}; ++i)); do
573      EXCEPT_LINE="${EXCEPT_ARRAY[$i]}"
574      EXCEPT_LINE=$(echo "$EXCEPT_LINE" | sed 's/\&amp;/\&/g') # copied from exception-matching code
575
576      if [ ${EXCEPT_FOUND[$i]} -eq 0 ]; then
577         EXCEPT_URL="${EXCEPT_LINE#*,}"
578         EXCEPT_URL="${EXCEPT_URL%,*}"
579         EXCEPT_PAGE="${EXCEPT_LINE##*,}"
580         EXCEPT_PAGE="${EXCEPT_PAGE%% *}"
581         if [ "$EXCEPT_PAGE" == "*" ]; then
582            valPrint tr "- The link '$EXCEPT_URL' did not return an error or is not on any page."
583         else
584            valPrint tr "- The link '$EXCEPT_URL' did not return an error or is not on page '$EXCEPT_PAGE'."
585         fi
586         let EXCEPTION_ISSUES+=1
587      elif [ ${EXCEPT_USED[$i]} -eq 0 ]; then
588         EXCEPT_URL="${EXCEPT_LINE#*,}"
589         EXCEPT_URL="${EXCEPT_URL%,*}"
590         EXCEPT_CODE=${EXCEPT_LINE%%,*}
591         valPrint tr "- The link '$EXCEPT_URL' did not return error code $EXCEPT_CODE."
592         let EXCEPTION_ISSUES+=1
593      fi
594   done
595   if [ $EXCEPTION_ISSUES -eq 0 ]; then
596      valPrint ctrh "- No issues found."
597   else
598      valPrint c "$EXCEPTION_ISSUES exception list $(pluralCheckNoun issue $EXCEPTION_ISSUES) detected (see RTF or TXT report for details)."
599      valPrint h "$EXCEPTION_ISSUES exception list $(pluralCheckNoun issue $EXCEPTION_ISSUES) detected (see <a href=\"$LOG_NAME_RTF\" target=\"_blank\">RTF</a> or <a href=\"$LOG_NAME_TXT\" target=\"_blank\">TXT</a> report for details)."
600   fi
601
602   # Print checked link totals
603   if [ $LINK_PROBLEMS_NET -gt 0 ]; then valPrint ctrh "$LINK_PROBLEMS_NET link $(pluralCheckNoun issue $LINK_PROBLEMS_NET):"; fi
604   if [ $LINK_PROBLEMS_NG -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_NG NG $(pluralCheckNoun link $LINK_PROBLEMS_NG)"; fi
605   if [ $LINK_PROBLEMS_RD -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_RD $(pluralCheckNoun redirection $LINK_PROBLEMS_RD)"; fi
606   if [ $LINK_PROBLEMS_EI -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_EI $(pluralCheckNoun link $LINK_PROBLEMS_EI) that could be intrawiki"; fi
607   if [ $LINK_PROBLEMS_IW -gt 0 ]; then valPrint ctrh "- $LINK_PROBLEMS_IW $(pluralCheckNoun link $LINK_PROBLEMS_IW) that could be interwiki"; fi
608
609   # Close the log files' markup
610   valPrint trh "ValExtLinks says goodbye."
611   printRTFfooter
612   printHTMfooter
613
614   # Upload report if this was requested
615   if [ ! -z $UPLOAD_INFO ] && [ $FINISHED_LIST != "no" ]; then
616      uploadReport
617   fi
618
619   # Really quit now
620   valPrint c "ValExtLinks says goodbye."
621   exit 0
622}
623trap wrapupAndExit INT
624
625
626### INITIALIZATION ###
627# Print opening message to console and log files
628valPrint cw "$(tput bold)--Validate External Links--$(tput sgr0)"
629printTXTheader
630printRTFheader
631printHTMheader
632
633## DATA SOURCING ##
634valPrint t "Startup:"
635valPrint r "\b1 Startup \b0"
636valPrint hn "<h3>Startup</h3>"
637
638# Attempt to download file at LINKS_URL, then check that it succeeded
639valPrint cwtrhn "Downloading list of external links from $LINKS_URL..."
640LINKS_FILE_NAME=$(echo "$LINKS_URL" | sed 's/.*\///')
641LINKS_FILE="$OUTPUT_PATH/$LINKS_FILE_NAME"
642curl --silent -o "$LINKS_FILE" $LINKS_URL
643if [ ! -f "$LINKS_FILE" ]; then
644   echo -e "\nThe download of $LINKS_URL appears to have failed. Aborting."
645   wrapupAndExit
646else
647   valPrint ctrh " success."
648fi
649
650# Attempt to download file at EXCEPT_URL, then check that it succeeded
651if [ ! -z $EXCEPT_URL ]; then
652   valPrint cwtrhn "Downloading list of reporting exceptions from $EXCEPT_URL..."
653   EXCEPT_DATA=$(curl --silent $EXCEPT_URL)
654   if [ -z "$EXCEPT_DATA" ]; then
655      echo -e "\nThe download of the exceptions data from '$EXCEPT_URL' appears to have failed. Aborting."
656      wrapupAndExit
657   else
658      valPrint ctrh " success."
659   fi
660   EXCEPT_DATA=${EXCEPT_DATA%END LIST*}
661   EXCEPT_DATA=${EXCEPT_DATA#*BEGIN LIST}
662   EXCEPT_FILE="$OUTPUT_PATH/$EXCEPT_FILE_NAME"
663
664   # Store on disk for debugging purposes
665   echo "$EXCEPT_DATA" > "$EXCEPT_FILE"
666
667   # Transfer to array for easy searching later
668   declare -a EXCEPT_ARRAY=($(echo "$EXCEPT_DATA"))
669
670   # Create parallel arrays for marking which exceptions get used later
671   declare -a EXCEPT_USED=()
672   declare -a EXCEPT_FOUND=()
673   for ((i = 0; i < ${#EXCEPT_ARRAY[@]}; ++i)); do
674      EXCEPT_USED+=(0)
675      EXCEPT_FOUND+=(0)
676   done
677fi
678
679# Pipe 'cat' to 'wc' because passing LINKS_FILE to 'wc' would print the file name after the line count
680LINK_COUNT_STRING=$(cat "$LINKS_FILE" | wc -l)
681
682# Number of URLs is number of lines minus one (first line is column header row for the CSV)
683LINK_COUNT=$(echo "${LINK_COUNT_STRING}" | tr -d '[:space:]')
684let LINK_COUNT-=1
685valPrint ctrh "Found $LINK_COUNT links to process."
686valPrint trh ""
687
688## CONFIG OUTPUT ##
689valPrint t "Config:"
690valPrint r "\b1 Config \b0"
691valPrint hn "<h3>Config</h3>"
692
693valPrint ctrhn "Links to consider: "
694if [ $URL_LIMIT -ne 0 ] && [ $URL_LIMIT -lt $LINK_COUNT ]; then
695   valPrint ctrh "$((URL_LIMIT-URL_START+1)), from link $URL_START to $URL_LIMIT"
696elif [ $URL_START -ne 1 ]; then
697   valPrint ctrh "$((LINK_COUNT-URL_START+1)), from link $URL_START to $LINK_COUNT"
698else
699   valPrint ctrh "$LINK_COUNT"
700fi
701
702valPrint ctrh "Site query timeout: $TIMEOUT seconds"
703
704valPrint ctrhn "Show OK links: "
705if [ $RECORD_OK_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
706
707valPrint ctrhn "Take screenshots: "
708if [ $TAKE_PAGE_SHOT -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
709
710valPrint ctrhn "Suggest archive.org snapshots for NG pages: "
711if [ $SUGGEST_SNAPSHOTS_NG -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
712
713valPrint ctrhn "Suggest archive.org snapshots for OK pages: "
714if [ $SUGGEST_SNAPSHOTS_OK -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
715
716valPrint ctrhn "Ignore slash-adding redirects: "
717if [ $SHOW_SLASH -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi
718
719valPrint ctrhn "Ignore HTTPS-upgrading redirects: "
720if [ $SHOW_HTTPS -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi
721
722valPrint ctrhn "Ignore youtu.be redirects: "
723if [ $SHOW_YT_RD -eq 1 ]; then valPrint ctrh "No"; else valPrint ctrh "Yes"; fi
724
725valPrint ctrhn "Check archive.org and archive.is links: "
726if [ $CHECK_ARCHIVE_LINKS -eq 1 ]; then valPrint ctrh "Yes"; else valPrint ctrh "No"; fi
727
728valPrint tr "A summary of my findings will be found at the bottom of the report."
729valPrint h "A summary of my findings will be found at the <a href=\"#summary\">bottom</a> of the report."
730valPrint trh ""
731
732## LEGEND OUTPUT ##
733valPrint t "Legend:"
734valPrint r "\b1 Legend \b0"
735valPrint hn "<h3>Legend</h3>"
736valPrint t "(For guidance in fixing these links, see $WIKI_MAIN. The exceptions list is at $EXCEPT_URL.)"
737valPrint r "(For guidance in fixing these links, see {\field{\*\fldinst{HYPERLINK \"$WIKI_MAIN\"}}{\fldrslt here}}. The exceptions list is {\field{\*\fldinst{HYPERLINK \"$EXCEPT_URL\"}}{\fldrslt here}}.)"
738valPrint h "(For guidance in fixing these links, see <a href=\"$WIKI_MAIN\" target=\"_blank\">here</a>. The exceptions list is <a href=\"$EXCEPT_URL\" target=\"_blank\">here</a>.)"
739valPrint trh "OK = URL seems to be working"
740valPrint trh "NG = URL no longer seems to work"
741valPrint trh "RD = URL is redirecting to this new URL"
742valPrint trh "EI = URL is an external link to an internal page and should be converted to an intrawiki link using the suggested markup"
743valPrint trh "IW = URL is an external link to a fellow wiki and should be converted to an interwiki link using the suggested markup"
744valPrint t "(xxx) = Unix tool 'curl' obtained this HTTP response status code (see here for code reference: $WIKI_HTTP)"
745valPrint r "(xxx) = Unix tool 'curl' obtained this HTTP response status code (see {\field{\*\fldinst{HYPERLINK \"$WIKI_HTTP\"}}{\fldrslt here}} for code reference)"
746valPrint h "(xxx) = Unix tool 'curl' obtained this HTTP response status code (see <a href=\"$WIKI_HTTP\" target=\"_blank\">here</a> for code reference)"
747valPrint t "(000-xx) = 'curl' did not get an HTTP response code, but returned this exit code (see here for code reference: $WIKI_CURL)"
748valPrint r "(000-xx) = 'curl' did not get an HTTP response code, but returned this exit code (see {\field{\*\fldinst{HYPERLINK \"$WIKI_CURL\"}}{\fldrslt here}} for code reference)"
749valPrint h "(000-xx) = 'curl' did not get an HTTP response code, but returned this exit code (see <a href=\"$WIKI_CURL\" target=\"_blank\">here</a> for code reference)"
750valPrint trh "IA suggests = Last available snapshot returned by the Internet Archive"
751valPrint trh "Try browsing = The Archive failed to return a snapshot URL, so check for a snapshot manually using this link"
752valPrint trh ""
753
754
755### MAIN LOOP ###
756valPrint t "Links:"
757valPrint r "\b1 Links \b0"
758valPrint hn "<h3>Links</h3>"
759START_RUN=$(date +%s)
760# Process each line of the .csv in LINKS_FILE
761for LINE in `cat "$LINKS_FILE"`; do
762   START_LINK=$(date +%s)
763   let LINK_NUM+=1
764
765   # First line is the column header row for the CSV, so let's verify that the format hasn't changed
766   if [ $SKIPPED_HEADER_ROW -eq 0 ]; then
767      if [ $LINE == "namespace,title,target" ]; then
768         SKIPPED_HEADER_ROW=1
769         LINK_NUM=0 # this line is not a link, so reset the link counter
770         valPrint hn "<table>"
771         continue
772      else
773         valPrint ctrh "Did not find expected header row in $LINKS_FILE. Aborting."
774         wrapupAndExit
775      fi
776   fi
777
778   # Skip this link if we are not at URL_START yet
779   if [ $LINK_NUM -lt $URL_START ]; then
780      continue
781   fi
782
783   # Stop if we are at the limit declared for testing purposes
784   if [ $URL_LIMIT -gt 0 ] && [ $LINK_NUM -gt $URL_LIMIT ]; then
785      FINISHED_LIST="limit"
786      wrapupAndExit
787   fi
788   
789   # Parse line into namespace ID number, containing wiki page, and external link URL
790   NS_ID=${LINE%%,*}
791   PAGE_NAME=${LINE#$NS_ID,}
792   PAGE_NAME=${PAGE_NAME%%,*} # a comma in the page name will break this
793   URL=${LINE#$NS_ID,$PAGE_NAME,} # commas can be in this
794   if [ -z "$NS_ID" ] || [ -z "$PAGE_NAME" ] || [ -z "$URL" ]; then
795      valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace, wiki page or link URL could not be read."
796      let SKIP_PARSE_FAIL+=1
797      continue
798   fi
799   
800   # Skip any link that isn't "http://" or "https://"
801   if [[ ! $URL =~ ^http* ]]; then
802      valPrint trs "Skipping line $LINK_NUM ('$LINE') because the protocol isn't 'http://' or 'https://'."
803      let SKIP_UNK_PROT+=1
804      continue
805   fi
806
807   # Print progress to screen
808   if [ $LINK_NUM -gt 1 ]; then
809      printf "\e[1A\n" # erase previous progress message so that new one appears in its place
810   fi
811   valPrint cn "Evaluating URL $LINK_NUM/$LINK_COUNT..."
812
813   # Find namespace number in NS_IDS and use it to look up namespace's name in NS_NAMES
814   NS_NAME=""
815   a=0
816   while [ "x${NS_IDS[$a]}" != "x" ]; do # once this evaluates to "x", the array is done
817      if [ $NS_ID == "NULL" ]; then
818         break
819      elif [ $NS_ID -eq ${NS_IDS[$a]} ]; then
820         NS_NAME="${NS_NAMES[$a]}"
821         break
822      fi
823      let a+=1
824   done
825   if [ "$NS_NAME" == "" ]; then
826      if [ $NS_ID == "NULL" ]; then
827         valPrint trs "Skipping line $LINK_NUM ('$LINE') because the namespace (and probably the page too) is 'NULL'. Probably the link is no longer in existence on the wiki."
828      else
829         valPrint trs "Skipping line $LINK_NUM ('$LINE') because I could not find a name for namespace ID $NS_ID."
830      fi
831      let SKIP_UNK_NS+=1
832      let PAGE_LINKS+=1
833      continue
834   fi
835
836   # Build longer wiki page URLs from namespace and page names
837   FULL_PAGE_PATH=https://$WIKI_PATH/$NS_NAME:$PAGE_NAME
838   LOCAL_PAGE_PATH=$NS_NAME:$PAGE_NAME
839   # Namespace "Main:" cannot be a part of the path; it's an implicit namespace, and naming it
840   # explicitly breaks the link
841   if [ $NS_ID -eq 0 ]; then
842      FULL_PAGE_PATH=https://$WIKI_PATH/$PAGE_NAME
843      LOCAL_PAGE_PATH=$PAGE_NAME
844   fi
845
846   # We don't want to consider wiki pages ending in .js, as the MW parser cannot reliably isolate URLS
847   # in JavaScript code, so it returns erroneous links
848   PAGE_NAME_SUFFIX=$(echo $PAGE_NAME | sed 's/.*\.//')
849   if [ $PAGE_NAME_SUFFIX == "js" ]; then
850      valPrint trs "Skipping URL '${LINE#$NS_ID,$PAGE_NAME,}' on line $LINK_NUM because it was found on JavaScript page '$LOCAL_PAGE_PATH'."
851      let SKIP_JS_PAGE+=1
852      let PAGE_LINKS+=1
853      continue
854   fi
855
856   # Scan for illegal characters
857   if [[ $URL == *[$ILLEGAL_CHARS]* ]]; then
858      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because it contains characters illegal in a URL."
859      let SKIP_BAD_URL+=1
860      let PAGE_LINKS+=1
861      continue
862   fi
863
864   # If we're skipping archive links, see if this is one
865   if [ $CHECK_ARCHIVE_LINKS -eq 0 ] && [[ ( $URL == *web.archive.org* || $URL == *archive.is* ) ]]; then
866      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to check archive links."
867      let SKIP_ARCHIVES+=1
868      let PAGE_LINKS+=1
869      continue
870   fi
871
872   # Now we need to know if the URL is for a file or a web page. First step is to determine if the
873   # URL ends in a suffix
874   HAS_SUFFIX=0
875
876   # If the URL ends in something like ".php?foo=bar", strip everything from the '?' onward
877   CLEAN_URL=${URL%%\?*}
878
879   # If the URL ends in something like "#section_15", strip everything from the '#' onward
880   CLEAN_URL=${CLEAN_URL%%\#*}
881
882   # 'sed' cannot handle Unicode in my Bash shell, so skip non-ASCII URL and make reader check it
883   if [[ $CLEAN_URL == *[![:ascii:]]* ]]; then
884      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I cannot handle non-ASCII characters."
885      let SKIP_NON_ASCII+=1
886      let PAGE_LINKS+=1
887      continue
888   fi
889
890   # Isolate the characters after the last period and after the last slash
891   POST_DOT=$(echo "$CLEAN_URL" | sed 's/.*\.//')
892   POST_SLASH=$(echo "$CLEAN_URL" | sed 's/.*\///')
893
894   # If the last period comes after the last slash, then the URL ends in a suffix
895   POST_DOT_LENGTH=$(echo | awk -v input=$POST_DOT '{print length(input)}')
896   POST_SLASH_LENGTH=$(echo | awk -v input=$POST_SLASH '{print length(input)}')
897   if [ $POST_DOT_LENGTH -lt $POST_SLASH_LENGTH ]; then
898      HAS_SUFFIX=1
899   else
900      HAS_SUFFIX=0
901   fi
902
903   # Now we need to know if the URL is for a file. If the URL ends in a suffix, compare it against the
904   # known URL endings supplied in HTTP_FILES and HTTP_TLDS_AND_PAGES
905   IS_FILE=-1
906   if [ $HAS_SUFFIX -eq 0 ]; then
907      IS_FILE=0
908   else
909      # Turn off case sensitivity while we compare suffixes
910      shopt -s nocasematch
911
912      # Special case: URLs ending in something like "productID.304297400" are pages, not files, so if
913      # the URL's suffix is all numbers, we are looking at the end of a web page URL
914      if [[ $POST_DOT =~ ^-?[0-9]+$ ]]; then
915         IS_FILE=0
916      fi
917
918      # Special case: URLs ending in a parens, e.g. "ms537113(v=vs.85)", are pages
919      if [[ $POST_DOT =~ ^.*[\(\)]$ ]]; then
920         IS_FILE=0
921      fi
922
923      # Special case: URLs containing a '%', e.g. "10.1007%2FBF00329055", are pages
924      if [[ $POST_DOT == *%* ]]; then
925         IS_FILE=0
926      fi
927     
928      # If we did not identify this URL as a web page above, we need to compare the suffix against known
929      # file extensions
930      if [ $IS_FILE -eq -1 ]; then
931         for EXTENSION in "${HTTP_FILES[@]}"; do
932            if [[ $EXTENSION == $POST_DOT ]]; then # double brackets respect the nocasematch setting
933               IS_FILE=1
934               break
935            fi
936         done
937      fi
938
939      # If we did not identify this URL as a file above, we need to compare the suffix against known
940      # pages and TLDs in order to rule out that this is an unknown file suffix or TLD that the user
941      # needs to add to the HTTP_FILES or HTTP_TLDS_AND_PAGES array
942      if [ $IS_FILE -eq -1 ]; then
943         for PAGE_OR_TLD in "${HTTP_TLDS_AND_PAGES[@]}"; do
944            if [[ $PAGE_OR_TLD == $POST_DOT ]]; then
945               IS_FILE=0
946               break
947            fi
948         done
949      fi
950
951      # Turn case sensitivity back on in Bash
952      shopt -u nocasematch
953   fi
954
955   # If this suffix escaped identification as either a file, page or TLD, inform the reader
956   STR_TYPE=""
957   if [ $IS_FILE -eq -1 ]; then
958      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown URL suffix '$POST_DOT'. Please add this suffix to the appropriate array in this script, HTTP_FILES or HTTP_TLDS_AND_PAGES."
959      let SKIP_UNK_SUFFIX+=1
960      continue
961   elif [ $IS_FILE -eq 1 ]; then
962      STR_TYPE="file"
963      let FILE_LINKS+=1
964   else
965      STR_TYPE="page"
966      let PAGE_LINKS+=1
967   fi
968
969   # Get response code using 'curl' to see if this link is valid; the --insecure option avoids an
970   # issue with sites that require HTTPS
971   CURL_CODE=$(curl -o /dev/null --silent --insecure --compressed --head --user-agent '$AGENT' --max-time $TIMEOUT --retry 2 --write-out '%{http_code}\n' $URL)
972   CURL_ERR=$(echo $?)
973   CURL_RESULT=$CURL_CODE
974
975   # Tack 'curl' exit code onto result string if 'curl' returned "000" (no HTTP response)
976   if [ $CURL_CODE == "000" ]; then
977      CURL_RESULT="$CURL_RESULT-$CURL_ERR"
978   fi
979
980   # Begin to determine our status code for this URL (EI, IW, OK, RD, or NG)
981   STATUS="??"
982   NEW_URL=""
983   INTERWIKI_INDEX=-1
984
985   # First make sure that this isn't an "external internal" link to our own wiki that can be replaced
986   # by "[[page_name]]". If it uses a special access URL beginning with "/w/", let it pass, as it
987   # probably cannot be replaced by "[[ ]]" markup
988   if [[ $URL == *$WIKI_PATH* ]] && [[ $URL != *$WIKI_PATH/w/* ]]; then
989      STATUS="EI"
990      let EI_LINKS+=1
991   fi
992
993   # If it's not, check if this is a link to a domain that we have an interwiki prefix for (also make
994   # sure that it's not an archive.org link to a page from an interwiki domain)
995   if [ $STATUS == "??" ] && [[ $URL != *web.archive.org* ]]; then
996      for ((i = 0; i < ${#INTERWIKI_DOMAINS[@]}; ++i)); do
997         if [[ $URL == *${INTERWIKI_DOMAINS[$i]}* ]] && [[ $URL != *${INTERWIKI_DOMAINS[$i]}/w/* ]]; then
998            STATUS="IW"
999            let IW_LINKS+=1
1000            INTERWIKI_INDEX=$i
1001            break
1002         fi
1003      done
1004   fi
1005
1006   # If we didn't match an interwiki domain, see if the status code is in our "OK" codes list
1007   if [ $STATUS == "??" ]; then
1008      for CODE in "${OK_CODES[@]}"; do
1009         if [[ $CODE == $CURL_CODE ]]; then
1010            STATUS="OK"
1011            let OK_LINKS+=1
1012
1013            # If this is a YouTube link, we have to look at the actual page source to know if the video
1014            # is good or not; override the link's info if it's actually NG
1015            if [[ $URL == *www.youtube.com* ]]; then
1016               PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $URL | grep "\"simpleText\":\"Video unavailable\"")
1017               if [ ! -z "$PAGE_TEXT" ]; then
1018                  STATUS="NG"
1019                  CURL_RESULT=404
1020                  let OK_LINKS-=1
1021                  let NG_LINKS+=1
1022               fi
1023            fi
1024            break
1025         fi
1026      done
1027   fi
1028
1029   # If we didn't get a match with the "OK" codes, check it against the "RD" codes
1030   if [ $STATUS == "??" ]; then
1031      for CODE in "${RD_CODES[@]}"; do
1032         if [[ $CODE == $CURL_CODE ]]; then
1033            # Get URL header again in order to retrieve the URL we are being redirected to
1034            NEW_URL=$(curl -o /dev/null --silent --insecure --head --user-agent '$AGENT' --max-time $TIMEOUT --write-out '%{redirect_url}\n' $URL)
1035
1036            # Adjust the old and new URLs to both use HTTP for comparison purposes, so we can filter
1037            # those changes out if the user didn't ask for them
1038            URL_HTTP=$(echo $URL | sed -E 's/^https:/http:/')
1039            NEW_URL_HTTP=$(echo $NEW_URL | sed -E 's/^https:/http:/')
1040
1041            # Sometimes 'curl' fails to get the redirect_url due to time-out or bad web site config
1042            NEW_URL_LENGTH=$(echo | awk -v input=$NEW_URL_HTTP '{print length(input)}')
1043            if [ $NEW_URL_LENGTH -lt $MIN_URL_LENGTH ]; then
1044               NEW_URL_HTTP="[new URL not retrieved]"
1045            fi
1046
1047            # Remove slash at end of new URL, if present, so we can filter out the redirects that
1048            # merely add an ending slash if the user didn't ask for them
1049            NEW_URL_NO_SLASH=$(echo $NEW_URL_HTTP | sed -E 's:/$::')
1050
1051            # Detect if this is a youtu.be link simply being expanded by YouTube to the full
1052            # youtube.com address
1053            YOUTU_BE=0
1054            if [[ $URL_HTTP == http://youtu.be* ]] && [[ $NEW_URL_HTTP == http://www.youtube.com* ]]; then
1055               YOUTU_BE=1
1056            fi
1057
1058            # If the URLs match besides HTTP being upgraded to HTTPS, then the link is OK (unless user
1059            # wants those to be reported)
1060            if [ $SHOW_HTTPS -eq 0 ] && [ $URL_HTTP == $NEW_URL_HTTP ]; then
1061               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show http->https upgrades, and I was redirected to '$NEW_URL'."
1062               STATUS="OK"
1063               let OK_LINKS+=1
1064               let SKIP_HTTPS_UP+=1
1065            # If the URLs match besides an added ending slash, then the link is OK (unless user wants
1066            # those to be reported)
1067            elif [ $SHOW_SLASH -eq 0 ] && [ $URL_HTTP == $NEW_URL_NO_SLASH ]; then
1068               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show added trailing slashes, and I was redirected to '$NEW_URL'."
1069               STATUS="OK"
1070               let OK_LINKS+=1
1071               let SKIP_SLASH_ADD+=1
1072            elif [ $YOUTU_BE -eq 1 ]; then
1073               # We have to look at the actual page source to know if a YouTube video is good or not
1074               PAGE_TEXT=$(curl --silent --insecure --user-agent '$AGENT' --max-time $TIMEOUT $NEW_URL | grep "\"simpleText\":\"Video unavailable\"")
1075               if [ ! -z "$PAGE_TEXT" ]; then
1076                  STATUS="NG"
1077                  let NG_LINKS+=1
1078               else
1079                  if [ $SHOW_YT_RD -eq 0 ]; then
1080                     valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I have not been asked to show redirects for youtu.be links, and I was redirected to '$NEW_URL'."
1081                     STATUS="OK"
1082                     let OK_LINKS+=1
1083                     let SKIP_YOUTU_BE+=1
1084                  else
1085                     STATUS="RD"
1086                     let RD_LINKS+=1
1087                  fi
1088               fi
1089            else
1090               STATUS="RD"
1091               let RD_LINKS+=1
1092            fi
1093            break
1094         fi
1095      done
1096   fi
1097
1098   # If we didn't get a match with the "RD" codes, check it against the "NG" codes
1099   if [ $STATUS == "??" ]; then
1100      for CODE in "${NG_CODES[@]}"; do
1101         if [[ $CODE == $CURL_CODE ]]; then
1102            STATUS="NG"
1103            let NG_LINKS+=1
1104            break
1105         fi
1106      done
1107   fi
1108
1109   # If we didn't match a known status code, advise the reader
1110   if [ $STATUS == "??" ]; then
1111      valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because I encountered the unknown response code $CURL_CODE."
1112      let SKIP_UNK_CODE+=1
1113      continue
1114   fi
1115
1116   # Check problem links against exceptions list before proceeding
1117   FOUND_EXCEPT=0
1118   if [ $STATUS != "OK" ] && [ ! -z "$EXCEPT_URL" ]; then
1119      # The code we expect to find in the exceptions file is either the 'curl' result or "EI"/"IW"
1120      EXPECT_CODE="$CURL_RESULT"
1121      if [ $STATUS == "EI" ]; then
1122         EXPECT_CODE="EI"
1123      elif [ $STATUS == "IW" ]; then
1124         EXPECT_CODE="IW"
1125      fi
1126
1127      # Look for link in exceptions list and make sure the listed result code and wiki page also match
1128      for ((i = 0; i < ${#EXCEPT_ARRAY[@]}; ++i)); do
1129      {
1130         EXCEPT_LINE="${EXCEPT_ARRAY[$i]}"
1131
1132         # Undo any HTML-encoding from the wiki page; for now we just worry about the ampersand, as most
1133         # other HTML-encoded characters are not found in URLs
1134         EXCEPT_LINE=$(echo "$EXCEPT_LINE" | sed 's/\&amp;/\&/g')
1135
1136         # Check for URL match
1137         EXCEPT_URL="${EXCEPT_LINE#*,}"
1138         EXCEPT_URL="${EXCEPT_URL%,*}"
1139         if [ "$EXCEPT_URL" != "$URL" ]; then
1140            continue
1141         fi
1142
1143         # Check for page name match
1144         EXCEPT_PAGE="${EXCEPT_LINE##*,}"
1145         EXCEPT_PAGE="${EXCEPT_PAGE%% *}"
1146         if [ "$EXCEPT_PAGE" == "*" ] || [ "$EXCEPT_PAGE" == "$LOCAL_PAGE_PATH" ]; then
1147            let EXCEPT_FOUND[$i]+=1
1148            valPrint trs "Found exception '$URL' on page '$LOCAL_PAGE_PATH'."
1149
1150            # Check for result code match
1151            EXCEPT_CODE=${EXCEPT_LINE%%,*}
1152            if [ "$EXCEPT_CODE" == "$EXPECT_CODE" ]; then
1153               FOUND_EXCEPT=1
1154               let EXCEPT_USED[$i]+=1
1155               valPrint trs "Skipping URL '$URL' (found on page '$LOCAL_PAGE_PATH') because its expected result, '$EXPECT_CODE', is in the exceptions list."
1156
1157               if [ $STATUS == "EI" ]; then
1158                  let SKIP_EXPECT_EI+=1
1159               elif [ $STATUS == "IW" ]; then
1160                  let SKIP_EXPECT_IW+=1
1161               elif [ $STATUS == "RD" ]; then
1162                  let SKIP_EXPECT_RD+=1
1163               else
1164                  let SKIP_EXPECT_NG+=1
1165               fi
1166
1167               break
1168            fi
1169         fi
1170      } done
1171   fi
1172   if [ $FOUND_EXCEPT -eq 1 ]; then
1173      continue
1174   fi
1175
1176   # If appropriate, record this link to the log, with clickable URLs when possible
1177   if [ $STATUS != "OK" ] || [ $RECORD_OK_LINKS -eq 1 ]; then
1178      # Prepare 'curl' result in parentheses to print after status code, unless this is an "IW" or "EI"
1179      # link, in which case showing the status code doesn't make sense. Adjust spacing after string to
1180      # ensure TXT and RTF reports have aligned columns of results.
1181      CURL_STR_H=" ($CURL_RESULT)"
1182      CURL_STR_T="$CURL_STR_H"
1183      CURL_STR_R="$CURL_STR_H   "
1184      if [ $STATUS == "IW" ] || [ $STATUS == "EI" ]; then
1185         CURL_STR_H=""
1186         CURL_STR_T="      "
1187         CURL_STR_R="                   "
1188      fi
1189     
1190      # Record link and its wiki page in TXT, RTF, and HTML markup
1191      valPrint t "${STATUS}${CURL_STR_T} $STR_TYPE $URL"
1192      valPrint t "  linked from $FULL_PAGE_PATH"
1193      valPrint r "${STATUS}${CURL_STR_R}${RTF_TABS}$STR_TYPE    {\field{\*\fldinst{HYPERLINK \"$URL\"}}{\fldrslt $URL}}"
1194      valPrint r "              linked from     {\field{\*\fldinst{HYPERLINK \"$FULL_PAGE_PATH\"}}{\fldrslt $LOCAL_PAGE_PATH}}"
1195      valPrint hn "<tr><td style=\"white-space:nowrap\">${STATUS}${CURL_STR_H}</td><td align=\"right\">$STR_TYPE</td><td><a href=\"$URL\" target=\"_blank\">$URL</a></td></tr>"
1196      valPrint hn "<tr><td colspan=\"2\" align=\"right\">linked from</td><td><a href=\"$FULL_PAGE_PATH\" target=\"_blank\">$LOCAL_PAGE_PATH</a></td></tr>"
1197
1198      # Place vertical space here since we won't be printing anything more about this link
1199      if [ $STATUS == "OK" ] && [ $SUGGEST_SNAPSHOTS_OK -eq 0 ]; then valPrint tr ""; valPrint hs ""; fi
1200
1201      # Record redirect URL if one was given by a 3xx response page
1202      if [ $STATUS == "RD" ]; then
1203         valPrint ts "  Server suggests $NEW_URL"
1204         valPrint rs "  Server suggests {\field{\*\fldinst{HYPERLINK \"$NEW_URL\"}}{\fldrslt $NEW_URL}}"
1205         valPrint hs "<tr><td colspan=\"2\" align=\"right\">Server suggests</td><td><a href=\"$NEW_URL\" target=\"_blank\">$NEW_URL</a></td></tr>"
1206      fi
1207
1208      # Notify reader if we can use an intrawiki link for this URL
1209      if [ $STATUS == "EI" ]; then
1210         INTRA_PAGE=${URL#*://*/}
1211         valPrint ts "  Just use [[$INTRA_PAGE]]"
1212         valPrint rs "          Just use [[$INTRA_PAGE]]"
1213         valPrint hs "<tr><td colspan=\"2\" align=\"right\">Just use</td><td>[[$INTRA_PAGE]]</td></tr>"
1214      fi
1215
1216      # Notify reader if we can use an interwiki prefix for this URL
1217      if [ $STATUS == "IW" ]; then
1218         INTER_PAGE=$(echo "$URL" | sed 's/.*\///')
1219         valPrint ts "  You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
1220         valPrint rs "          You can use [[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]"
1221         valPrint hs "<tr><td colspan=\"2\" align=\"right\">You can use</td><td>[[${INTERWIKI_PREFIXES[$INTERWIKI_INDEX]}:$INTER_PAGE]]</td></tr>"
1222      fi
1223
1224      # Query Internet Archive for latest "OK" snapshot for "NG" page
1225      if [[ ( $STATUS == "NG" && $SUGGEST_SNAPSHOTS_NG -eq 1 ) || ( $STATUS == "OK" && $SUGGEST_SNAPSHOTS_OK -eq 1 ) ]]; then
1226
1227         # We need to watch out for the rate limit or we'll get locked out; look at how much time has
1228         # elapsed and then wait the remainder between that and how long of a wait we think is needed
1229         # to avoid the dreaded "Too Many Requests" response. 5 seconds is just a guess.
1230         CUR_TIME=$(date +%s)
1231         WAIT_REMAINDER=$((5 - $CUR_TIME + $START_LINK))
1232         if [ $WAIT_REMAINDER -gt 0 ]; then
1233            valPrint t "Waiting $WAIT_REMAINDER second(s) to conform to Archive.org rate limit."
1234            sleep $WAIT_REMAINDER
1235         fi
1236
1237         # Issue query to the API
1238         ARCHIVE_QUERY=$(curl --silent --max-time $TIMEOUT "$ARCHIVE_API?url=$URL&$ARCHIVE_OK_CODES")
1239
1240         # Notify reader if we hit the rate limit and just keep going
1241         if [[ "$ARCHIVE_QUERY" == "*Too Many Requests*" ]]; then
1242            valPrint t "  IA has rate-limited us!"
1243            valPrint r "                IA has rate-limited us!"
1244            valPrint hs "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td>(hit the API rate limit!)</td></tr>"
1245         # If a "closest" snapshot was received, inform reader
1246         elif [[ "$ARCHIVE_QUERY" == *\"closest\":* ]]; then
1247            # In case the URL has a shebang in it (like mega.nz links do), escape the '!' to break it
1248            ARCHIVE_QUERY=$(echo "$ARCHIVE_QUERY" | sed 's/#!/#\\!/')
1249
1250            # ...isolate "url" property in the response that follows the "closest" tag
1251            SNAPSHOT_URL=${ARCHIVE_QUERY##*\"closest\":} # everything after '"closest":'
1252            SNAPSHOT_URL=${SNAPSHOT_URL#*\"url\": \"} # everything after '"url": "'
1253            SNAPSHOT_URL=${SNAPSHOT_URL%%\"*} # everything before '"'
1254
1255            # Remove the port 80 part that IA often adds to the URL, as it's superfluous
1256            SNAPSHOT_URL=$(echo $SNAPSHOT_URL | sed 's/:80//')
1257
1258            # Inform the reader of the snapshot URL
1259            valPrint ts "  IA suggests $SNAPSHOT_URL"
1260            valPrint rs "               IA suggests {\field{\*\fldinst{HYPERLINK \"$SNAPSHOT_URL\"}}{\fldrslt $SNAPSHOT_URL}}"
1261            valPrint hs "<tr><td colspan=\"2\" align=\"right\">IA suggests</td><td><a href=\"$SNAPSHOT_URL\" target=\"_blank\">$SNAPSHOT_URL</a></td></tr>"
1262         else # Otherwise give a generic Wayback Machine link for this URL, which might work
1263            valPrint ts " Try browsing $ARCHIVE_GENERIC/$URL"
1264            valPrint rs "               Try browsing {\field{\*\fldinst{HYPERLINK \"$ARCHIVE_GENERIC/$URL\"}}{\fldrslt $ARCHIVE_GENERIC/$URL}}"
1265            valPrint hs "<tr><td colspan=\"2\" align=\"right\">Try browsing</td><td><a href=\"$ARCHIVE_GENERIC/$URL\" target=\"_blank\">$ARCHIVE_GENERIC/$URL</a></td></tr>"
1266         fi
1267      fi
1268   fi
1269   
1270   # If this is a page and it seems to be there, take screenshot of it using headless Google Chrome
1271   if [ $IS_FILE -eq 0 ] && [ $TAKE_PAGE_SHOT -eq 1 ] && [ $STATUS == "OK" ]; then
1272      # Sanitize URL by removing "http(s)://" and converting any other colons or slashes to underscores
1273      SHOT_NAME=$(echo "$URL" | sed 's/https*\:\/\///' | sed 'y/:\//__/')
1274      SHOT_FILE="$SHOT_PATH/$SHOT_NAME.png"
1275
1276      # Don't take screenshot if we already encountered this page and screenshotted it
1277      if [ ! -f "$SHOT_FILE" ]; then
1278         "$CHROME_PATH" --headless --disable-gpu --screenshot --window-size=1500,900 $URL > /dev/null 2>&1
1279         if [ -f "$WORKING_DIR/$CHROME_SCREENSHOT" ]; then
1280            mv -n "$WORKING_DIR/$CHROME_SCREENSHOT" "$SHOT_FILE"
1281         else
1282            valPrint trhs "Screenshot of URL $URL seems to have failed!"
1283         fi
1284      else
1285         valPrint trhs "Skipping screenshot of URL '$URL' because file '$SHOT_FILE' already exists."
1286      fi
1287   fi
1288done
1289FINISHED_LIST="yes"
1290wrapupAndExit
Note: See TracBrowser for help on using the repository browser.