Changeset 1205 for ValBot/Python/check_intrawiki_section_links.py
- Timestamp:
- Feb 9, 2026, 4:24:16 AM (7 hours ago)
- File:
-
- 1 edited
-
ValBot/Python/check_intrawiki_section_links.py (modified) (6 diffs)
Legend:
- Unmodified
- Added
- Removed
-
ValBot/Python/check_intrawiki_section_links.py
r1194 r1205 16 16 from pywikibot.bot import QuitKeyboardInterrupt 17 17 from pywikibot import pagegenerators 18 from pywikibot.tools.formatter import color_format19 18 from pywikibot.comms.http import fetch 20 19 from pywikibot.specialbots import UploadRobot … … 79 78 if search_terms[-1].startswith('-'): 80 79 search_terms.pop() 81 # Remake text directive with the terms separated by spaces as they should be in the page text 80 # Remake text directive with the terms separated by spaces as they should be in the page 81 # text 82 82 newSep = ' ' 83 83 search_string = newSep.join(search_terms) … … 114 114 pywikibot.stdout(' The section "{0}" was found on page "{1}".'.format(anchor_name, target_page_name_human)) 115 115 116 # For a link that redirected us to another page, extract the name of the target page from 117 # t he target page's source116 # For a link that redirected us to another page, extract the name of the target page from the 117 # target page's source 118 118 def find_canonical_link(page_text, page_name, page_slug): 119 119 # Extract link from this markup which contains name of redirected-to page: … … 129 129 canonical_name = canonical_name[:tag_end] 130 130 if len(canonical_name) > 100: 131 # Certain things can cause the trim to fail; report error and avoid slamming the 132 # outputwith massive page source from a failed trim131 # Certain things can cause the trim to fail; report error and avoid slamming the output 132 # with massive page source from a failed trim 133 133 pywikibot.stdout(' ERROR: The link "{}" is a redirect to "{2}…" (string overflow).'.format(page_slug, canonical_name[:100])) 134 134 errors_issued = errors_issued + 1 … … 152 152 # automatically follows redirects. This will catch formal redirects which come from pages 153 153 # such as Special:PermanentLink. 154 if response.history != []: 155 permalink1 = 'Special:PermanentLink/'.lower() 156 permalink2 = 'Special:Permalink/'.lower() 157 page_slug_lower = page_slug.lower() 158 if page_slug_lower.startswith(permalink1) or page_slug_lower.startswith(permalink2): 159 if debug: 160 possibly_print(page_name) 161 pywikibot.stdout(' Got redirection code "{0}" for permanent revision link "{1}". Checking the target page….'.format(response.history[0], page_slug)) 162 find_canonical_link(response.text, page_name, page_slug) 163 else: 154 permalink1 = 'Special:PermanentLink/'.lower() 155 permalink2 = 'Special:Permalink/'.lower() 156 page_slug_lower = page_slug.lower() 157 if response.history != [] and (page_slug_lower.startswith(permalink1) or page_slug_lower.startswith(permalink2)): 158 if debug: 164 159 possibly_print(page_name) 165 pywikibot.stdout(' ERROR: Unrecognized type of redirection (code "{0}") for link "{1}". You should check the link manually.'.format(response.history[0], page_slug)) 166 advice_issued += 1 167 elif response.status_code != 200: 168 possibly_print(page_name) 169 pywikibot.stdout(' ERROR: Got response code {0} on URL {1}. The target page may not exist.'.format(response.status_code, iw_url)) 170 errors_issued += 1 171 # However the usual way that a redirect occurs is that MediaWiki redirects us sneakily 172 # using JavaScript, while returning code OK 200 as if the link was correct; this happens 173 # when a redirect page is accessed. We must detect these soft redirects by looking at the 174 # page source to find the redirect note inserted at the top of the page for the reader. 160 pywikibot.stdout(' Got redirection code "{0}" for permanent revision link "{1}". Checking the target page….'.format(response.history[0], page_slug)) 161 find_canonical_link(response.text, page_name, page_slug) 162 # However the usual way that a redirect occurs is that a redirect page is visited and 163 # MediaWiki sends us to the new page using JavaScript while returning code 301. Formerly it 164 # used to return 200 as if the link was correct, so rather than looking for code 301 we 165 # detect these soft redirects by looking at the page source to find the redirect note that 166 # gets inserted at the top of the page for the reader. 175 167 elif 'Redirected from <a' in response.text: 176 168 if debug: … … 178 170 pywikibot.stdout(' Got silently redirected by link "{}". Checking the target page….'.format(page_slug)) 179 171 find_canonical_link(response.text, page_name, page_slug) 172 # This handles response codes other than 200 and 301 (301 is returned in the above case of a 173 # silent redirect) 174 elif response.status_code != 200: 175 possibly_print(page_name) 176 pywikibot.stdout(' ERROR: Got response code {0} on URL {1}. The target page may not exist.'.format(response.status_code, iw_url)) 177 errors_issued += 1 180 178 else: # URL is OK, so proceed 181 179 find_section(response.text, page_name, page_slug, False)
Note:
See TracChangeset
for help on using the changeset viewer.
