Changeset 1152
- Timestamp:
- Mar 31, 2021, 6:29:48 PM (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
ValBot/check_interwiki_links.py
r1151 r1152 43 43 # Construct full URL for the particular wiki 44 44 iw_url = interwiki_urls[cur] + page_title 45 pywikibot.output('Found {0} link {1} '.format(prefix, page_title))45 pywikibot.output('Found {0} link {1}.'.format(prefix, page_title)) 46 46 iw_found = iw_found + 1 47 47 … … 55 55 56 56 # Test the URL 57 #pywikibot.output('Testing URL {} '.format(iw_url))57 #pywikibot.output('Testing URL {}...'.format(iw_url)) 58 58 response = fetch(iw_url) 59 59 … … 61 61 # way we tell that a redirect occurred is by checking the history 62 62 if response.history != []: 63 pywikibot.output('WARNING: Initially got{}.'.format(response.history))63 pywikibot.output('WARNING: Redirected from {}.'.format(response.history)) 64 64 problems_found = problems_found + 1 65 65 elif response.status_code != 200: 66 66 #pywikibot.output('WARNING: Got response code {}.'.format(response.status_code)) # commented out because fetch() already prints such a msg 67 67 problems_found = problems_found + 1 68 elif '#' in page_title: 69 # Isolate section link 70 pywikibot.output('Detected section link on page {0}.'.format(page_title)) 71 page_name, anchor_name = page_title.split('#') 72 73 # Convert dot-notation hex entities to proper characters 74 anchor_name = anchor_name.replace('.22', '"') 75 anchor_name = anchor_name.replace('.27', '\'') 76 anchor_name = anchor_name.replace('.28', '(') 77 anchor_name = anchor_name.replace('.29', ')') 78 79 # Read linked page to see if it really has this anchor link 80 soup = BeautifulSoup(response.text, 'html.parser') 81 found_section = False 82 for tag in soup.findAll('a'): 83 link = tag.get('href', None) 84 if not link: 85 #pywikibot.output('It is not a link.') 86 continue 87 #pywikibot.output('Got link {0}.'.format(link)) 88 if not link.startswith('#'): 89 continue 90 91 if link == '#' + anchor_name: 92 pywikibot.output('Found section link!') 93 found_section = True 94 break 95 if found_section == False: 96 pywikibot.output('Could not find section {0} on page {1}.'.format(anchor_name, page_name)) 97 problems_found = problems_found + 1 68 98 cur = cur + 1 69 99
Note:
See TracChangeset
for help on using the changeset viewer.