Changeset 1194
- Timestamp:
- Nov 18, 2024, 5:00:08 AM (5 weeks ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
ValBot/Python/check_intrawiki_section_links.py
r1192 r1194 59 59 def find_section(page_text, page_name, page_slug, print_result): 60 60 global errors_issued 61 62 # Isolate section link 61 found_section = False 62 63 # Isolate section link or text fragment link 63 64 target_page_name, anchor_name = page_slug.split('#', 1) 64 65 target_page_name_human = target_page_name.replace('_', ' ') 65 if debug: pywikibot.stdout(' Searching for section link {} on page.'.format(anchor_name)) 66 67 # Read linked page to see if it really has this anchor link 68 soup = BeautifulSoup(page_text, 'html.parser') 69 found_section = False 70 for span_tag in soup.findAll('span'): 71 span_name = span_tag.get('id', None) 72 if span_name == anchor_name: 73 if debug and not print_result: pywikibot.stdout(' Found section in a span!') 66 67 # First check if this is a text fragment directive, and look for it if so 68 if anchor_name.startswith(':~:text='): 69 if debug: pywikibot.stdout(' Found text fragment directive {} from URL {}.'.format(anchor_name, page_slug)) 70 anchor_name = anchor_name[8:] 71 # We're only checking the first text directive, so strip add'l ones if present 72 addl_fragment = anchor_name.find('&text=') 73 if addl_fragment != -1: 74 anchor_name = anchor_name[:addl_fragment] 75 search_terms = anchor_name.split(',') 76 # Delete prefix and suffix terms because they aren't needed 77 if search_terms[0].endswith('-'): 78 search_terms.pop(0) 79 if search_terms[-1].startswith('-'): 80 search_terms.pop() 81 # Remake text directive with the terms separated by spaces as they should be in the page text 82 newSep = ' ' 83 search_string = newSep.join(search_terms) 84 if debug: pywikibot.stdout(' Converted text fragment to string "{}".'.format(search_string)) 85 if search_string in page_text: 74 86 found_section = True 75 break 87 if debug and not print_result: pywikibot.stdout(' Found text fragment!') 88 89 # If we're still here, it's a section link; read linked page to see if it really has this 90 # anchor link 91 if found_section == False: 92 if debug: pywikibot.stdout(' Searching for section link {} on page.'.format(anchor_name)) 93 soup = BeautifulSoup(page_text, 'html.parser') 94 # Search for a span with this ID 95 for span_tag in soup.findAll('span'): 96 span_name = span_tag.get('id', None) 97 if span_name == anchor_name: 98 if debug and not print_result: pywikibot.stdout(' Found section in a span!') 99 found_section = True 100 break 76 101 if found_section == False: 77 102 # Search for a div with this ID … … 128 153 # such as Special:PermanentLink. 129 154 if response.history != []: 130 131 155 permalink1 = 'Special:PermanentLink/'.lower() 132 156 permalink2 = 'Special:Permalink/'.lower()
Note:
See TracChangeset
for help on using the changeset viewer.