| 1 | #!/bin/bash
|
|---|
| 2 |
|
|---|
| 3 | IFS="
|
|---|
| 4 | "
|
|---|
| 5 |
|
|---|
| 6 | CORE="/path/to/Pywikibot/core"
|
|---|
| 7 | SUMMARY="added ending slash to URL and/or upgrading http to https to satisfy redirect"
|
|---|
| 8 | RATE=6
|
|---|
| 9 | FIX_START=0
|
|---|
| 10 | FIX_END=0
|
|---|
| 11 |
|
|---|
| 12 | cd "$CORE"
|
|---|
| 13 | if [ ! -f "pwb.py" ]; then
|
|---|
| 14 | echo "drive_slash_adding.sh: Can't launch Pywikibot!"
|
|---|
| 15 | exit
|
|---|
| 16 | fi
|
|---|
| 17 |
|
|---|
| 18 | echo "drive_slash_adding.sh: Starting at fix $FIX_START..."
|
|---|
| 19 |
|
|---|
| 20 | FIX_CUR=0
|
|---|
| 21 | LAST_RUN=0
|
|---|
| 22 | for THE_LINE in `cat "/path/to/ValExtLinks report.txt"`; do
|
|---|
| 23 | #echo "drive_slash_adding.sh: Considering '$THE_LINE'..."
|
|---|
| 24 | if [[ "$THE_LINE" =~ .*trailing.* ]] && [[ ! "$THE_LINE" =~ .*w/index.php.* ]]; then
|
|---|
| 25 | #echo "drive_slash_adding.sh: This URL needs to be fixed."
|
|---|
| 26 | let FIX_CUR+=1
|
|---|
| 27 |
|
|---|
| 28 | if [ $FIX_CUR -lt $FIX_START ]; then
|
|---|
| 29 | continue
|
|---|
| 30 | fi
|
|---|
| 31 |
|
|---|
| 32 | if [ $FIX_END -gt 0 ] && [ $FIX_CUR -gt $FIX_END ]; then
|
|---|
| 33 | echo "drive_slash_adding.sh: Stopped after fix $FIX_END."
|
|---|
| 34 | exit
|
|---|
| 35 | fi
|
|---|
| 36 |
|
|---|
| 37 | # Wait for rate limit to expire if we have run the Python script before in this session
|
|---|
| 38 | if [ $LAST_RUN -gt 0 ]; then
|
|---|
| 39 | CUR_TIME=$(date +%s)
|
|---|
| 40 | WAIT_REMAINDER=$(($RATE - $CUR_TIME + $LAST_RUN))
|
|---|
| 41 | if [ $WAIT_REMAINDER -gt 0 ]; then
|
|---|
| 42 | echo "drive_slash_adding.sh: Waiting $WAIT_REMAINDER second(s)."
|
|---|
| 43 | sleep $WAIT_REMAINDER
|
|---|
| 44 | fi
|
|---|
| 45 | fi
|
|---|
| 46 | ON_PAGE=${THE_LINE#*page \'}
|
|---|
| 47 | ON_PAGE=${ON_PAGE%%\'*}
|
|---|
| 48 | FROM_LINK=${THE_LINE#*URL \'}
|
|---|
| 49 | FROM_LINK=${FROM_LINK%%\'*}
|
|---|
| 50 | TO_LINK=${THE_LINE%\'*}
|
|---|
| 51 | TO_LINK=${TO_LINK##*\'}
|
|---|
| 52 |
|
|---|
| 53 | #if [[ "$THE_LINE" =~ ${FROM_LINK}[^a-zA-Z/] ]]; then
|
|---|
| 54 | # echo "URL is not isolated, skipping."
|
|---|
| 55 | # continue
|
|---|
| 56 | #fi
|
|---|
| 57 |
|
|---|
| 58 | LAST_RUN=$(date +%s)
|
|---|
| 59 | echo "pwb.by replace '-page:\"$ON_PAGE\" \"$FROM_LINK\" \"$TO_LINK\""
|
|---|
| 60 | python pwb.py replace -page:"$ON_PAGE" "$FROM_LINK" "$TO_LINK" -summary:"$SUMMARY"
|
|---|
| 61 | fi
|
|---|
| 62 | done
|
|---|