Changeset 1207 for ValBot/Python/check_interwiki_links.py
- Timestamp:
- Mar 22, 2026, 12:06:14 AM (4 hours ago)
- File:
-
- 1 edited
-
ValBot/Python/check_interwiki_links.py (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
ValBot/Python/check_interwiki_links.py
r1204 r1207 10 10 import re 11 11 import requests # for listing members with dir() when debugging 12 import time 12 13 13 14 from bs4 import BeautifulSoup … … 20 21 class IWLink: 21 22 def __init__(self, iw_prefix, prefix_url, full_url, page_name, page_name_only, page_slug, hosting_page, curl_response): 22 self.iw_prefix = iw_prefix # e.g. "wp" 23 self.iw_prefix = iw_prefix # e.g. "wp" as in [[wp:Marathon (series)#Rampancy]] 23 24 self.prefix_url = prefix_url # e.g. "https://en.wikipedia.org/wiki/" 24 25 self.full_url = full_url # e.g. "https://en.wikipedia.org/wiki/Marathon_(series)#Rampancy" … … 41 42 unintended_redirects_found = 0 42 43 name_printed = 0 44 request_delay = 1.5 45 max_retries = 3 46 backoff_factor = 2 43 47 44 48 # Prints the name of a page on which something occurred, if it has not been printed before … … 116 120 global unintended_redirects_found 117 121 118 the_link.curl_response = fetch(the_link.full_url) 122 # We have to carefully throttle requests because otherwise we will get hit with a 429: Too Many Requests 123 attempt = 0 124 delay = request_delay 125 while True: 126 time.sleep(delay) 127 128 the_link.curl_response = fetch(the_link.full_url) 129 130 if the_link.curl_response.status_code != 429: 131 break 132 133 attempt += 1 134 if attempt > max_retries: 135 pywikibot.stdout(f' ERROR: Maximum retries afer error 429 exceeded for "{the_link.page_slug}". Aborting script.') 136 raise SystemExit(1) 137 138 # Increase rate limit if we got the error 139 delay *= backoff_factor 140 pywikibot.stdout(f' WARNING: Received error 429 for "{the_link.page_slug}". Retrying in {delay:.1f}s...') 119 141 120 142 # One way we tell that a redirect occurred is by checking fetch's history, as it automatically follows redirects. This will catch formal redirects which come from
Note:
See TracChangeset
for help on using the changeset viewer.
