Ignore:
Timestamp:
Mar 22, 2026, 12:06:14 AM (4 hours ago)
Author:
iritscen
Message:

ValBot: Added throttle to check_interwiki_links.py to avoid Wikipedia kicking us out with error 429.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • ValBot/Python/check_interwiki_links.py

    r1204 r1207  
    1010import re
    1111import requests # for listing members with dir() when debugging
     12import time
    1213
    1314from bs4 import BeautifulSoup
     
    2021class IWLink:
    2122   def __init__(self, iw_prefix, prefix_url, full_url, page_name, page_name_only, page_slug, hosting_page, curl_response):
    22       self.iw_prefix = iw_prefix # e.g. "wp"
     23      self.iw_prefix = iw_prefix # e.g. "wp" as in [[wp:Marathon (series)#Rampancy]]
    2324      self.prefix_url = prefix_url # e.g. "https://en.wikipedia.org/wiki/"
    2425      self.full_url = full_url # e.g. "https://en.wikipedia.org/wiki/Marathon_(series)#Rampancy"
     
    4142unintended_redirects_found = 0
    4243name_printed = 0
     44request_delay = 1.5
     45max_retries = 3
     46backoff_factor = 2
    4347
    4448# Prints the name of a page on which something occurred, if it has not been printed before
     
    116120   global unintended_redirects_found
    117121   
    118    the_link.curl_response = fetch(the_link.full_url)
     122   # We have to carefully throttle requests because otherwise we will get hit with a 429: Too Many Requests
     123   attempt = 0
     124   delay = request_delay
     125   while True:
     126       time.sleep(delay)
     127   
     128       the_link.curl_response = fetch(the_link.full_url)
     129   
     130       if the_link.curl_response.status_code != 429:
     131           break
     132   
     133       attempt += 1
     134       if attempt > max_retries:
     135          pywikibot.stdout(f'   ERROR: Maximum retries afer error 429 exceeded for "{the_link.page_slug}". Aborting script.')
     136          raise SystemExit(1)
     137   
     138       # Increase rate limit if we got the error
     139       delay *= backoff_factor
     140       pywikibot.stdout(f'   WARNING: Received error 429 for "{the_link.page_slug}". Retrying in {delay:.1f}s...')
    119141
    120142   # One way we tell that a redirect occurred is by checking fetch's history, as it automatically follows redirects. This will catch formal redirects which come from
Note: See TracChangeset for help on using the changeset viewer.