Ignore:
Timestamp:
Feb 9, 2026, 4:24:16 AM (7 hours ago)
Author:
iritscen
Message:

ValBot: Revised logic in check_intrawiki_section_links.py as MediaWiki now apparently returns response 301 when the user is redirected by a redirect page.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • ValBot/Python/check_intrawiki_section_links.py

    r1194 r1205  
    1616from pywikibot.bot import QuitKeyboardInterrupt
    1717from pywikibot import pagegenerators
    18 from pywikibot.tools.formatter import color_format
    1918from pywikibot.comms.http import fetch
    2019from pywikibot.specialbots import UploadRobot
     
    7978      if search_terms[-1].startswith('-'):
    8079         search_terms.pop()
    81       # Remake text directive with the terms separated by spaces as they should be in the page text
     80      # Remake text directive with the terms separated by spaces as they should be in the page
     81      # text
    8282      newSep = ' '
    8383      search_string = newSep.join(search_terms)
     
    114114      pywikibot.stdout('   The section "{0}" was found on page "{1}".'.format(anchor_name, target_page_name_human))
    115115
    116 # For a link that redirected us to another page, extract the name of the target page from
    117 # the target page's source
     116# For a link that redirected us to another page, extract the name of the target page from the
     117# target page's source
    118118def find_canonical_link(page_text, page_name, page_slug):
    119119   # Extract link from this markup which contains name of redirected-to page:
     
    129129      canonical_name = canonical_name[:tag_end]
    130130      if len(canonical_name) > 100:
    131          # Certain things can cause the trim to fail; report error and avoid slamming the
    132          # output with massive page source from a failed trim
     131         # Certain things can cause the trim to fail; report error and avoid slamming the output
     132         # with massive page source from a failed trim
    133133         pywikibot.stdout('   ERROR: The link "{}" is a redirect to "{2}…" (string overflow).'.format(page_slug, canonical_name[:100]))
    134134         errors_issued = errors_issued + 1
     
    152152   # automatically follows redirects. This will catch formal redirects which come from pages
    153153   # such as Special:PermanentLink.
    154    if response.history != []:
    155       permalink1 = 'Special:PermanentLink/'.lower()
    156       permalink2 = 'Special:Permalink/'.lower()
    157       page_slug_lower = page_slug.lower()
    158       if page_slug_lower.startswith(permalink1) or page_slug_lower.startswith(permalink2):
    159          if debug:
    160             possibly_print(page_name)
    161             pywikibot.stdout('   Got redirection code "{0}" for permanent revision link "{1}". Checking the target page….'.format(response.history[0], page_slug))
    162          find_canonical_link(response.text, page_name, page_slug)
    163       else:
     154   permalink1 = 'Special:PermanentLink/'.lower()
     155   permalink2 = 'Special:Permalink/'.lower()
     156   page_slug_lower = page_slug.lower()
     157   if response.history != [] and (page_slug_lower.startswith(permalink1) or page_slug_lower.startswith(permalink2)):
     158      if debug:
    164159         possibly_print(page_name)
    165          pywikibot.stdout('   ERROR: Unrecognized type of redirection (code "{0}") for link "{1}". You should check the link manually.'.format(response.history[0], page_slug))
    166          advice_issued += 1
    167    elif response.status_code != 200:
    168       possibly_print(page_name)
    169       pywikibot.stdout('   ERROR: Got response code {0} on URL {1}. The target page may not exist.'.format(response.status_code, iw_url))
    170       errors_issued += 1
    171    # However the usual way that a redirect occurs is that MediaWiki redirects us sneakily
    172    # using JavaScript, while returning code OK 200 as if the link was correct; this happens
    173    # when a redirect page is accessed. We must detect these soft redirects by looking at the
    174    # page source to find the redirect note inserted at the top of the page for the reader.
     160         pywikibot.stdout('   Got redirection code "{0}" for permanent revision link "{1}". Checking the target page….'.format(response.history[0], page_slug))
     161      find_canonical_link(response.text, page_name, page_slug)
     162   # However the usual way that a redirect occurs is that a redirect page is visited and
     163   # MediaWiki sends us to the new page using JavaScript while returning code 301. Formerly it
     164   # used to return 200 as if the link was correct, so rather than looking for code 301 we
     165   # detect these soft redirects by looking at the page source to find the redirect note that
     166   # gets inserted at the top of the page for the reader.
    175167   elif 'Redirected from <a' in response.text:
    176168      if debug:
     
    178170         pywikibot.stdout('   Got silently redirected by link "{}". Checking the target page….'.format(page_slug))
    179171      find_canonical_link(response.text, page_name, page_slug)
     172   # This handles response codes other than 200 and 301 (301 is returned in the above case of a
     173   # silent redirect)
     174   elif response.status_code != 200:
     175      possibly_print(page_name)
     176      pywikibot.stdout('   ERROR: Got response code {0} on URL {1}. The target page may not exist.'.format(response.status_code, iw_url))
     177      errors_issued += 1
    180178   else: # URL is OK, so proceed
    181179      find_section(response.text, page_name, page_slug, False)
Note: See TracChangeset for help on using the changeset viewer.