Context Navigation

← Previous Change
Next Change →

check_intrawiki_section_links.py

Timestamp:

Nov 18, 2024, 5:00:08 AM (3 days ago)

Author:

iritscen

Message:

ValBot: check_intrawiki_section_links.py now understands text fragment directives.

File:

: 1 edited

ValBot/Python/check_intrawiki_section_links.py (modified) (2 diffs)

Legend:

: Unmodified
: Added
: Removed

ValBot/Python/check_intrawiki_section_links.py

-              r1192
+              r1194
 def find_section(page_text, page_name, page_slug, print_result):
    global errors_issued
+   # Isolate section link
+   found_section = False
+   # Isolate section link or text fragment link
    target_page_name, anchor_name = page_slug.split('#', 1)
    target_page_name_human = target_page_name.replace('_', ' ')
+   if debug: pywikibot.stdout('         Searching for section link {} on page.'.format(anchor_name))
+   # Read linked page to see if it really has this anchor link
+   soup = BeautifulSoup(page_text, 'html.parser')
+   found_section = False
+   for span_tag in soup.findAll('span'):
+      span_name = span_tag.get('id', None)
+      if span_name == anchor_name:
+         if debug and not print_result: pywikibot.stdout('         Found section in a span!')
+   # First check if this is a text fragment directive, and look for it if so
+   if anchor_name.startswith(':~:text='):
+      if debug: pywikibot.stdout('         Found text fragment directive {} from URL {}.'.format(anchor_name, page_slug))
+      anchor_name = anchor_name[8:]
+      # We're only checking the first text directive, so strip add'l ones if present
+      addl_fragment = anchor_name.find('&text=')
+      if addl_fragment != -1:
+         anchor_name = anchor_name[:addl_fragment]
+      search_terms = anchor_name.split(',')
+      # Delete prefix and suffix terms because they aren't needed
+      if search_terms[0].endswith('-'):
+         search_terms.pop(0)
+      if search_terms[-1].startswith('-'):
+         search_terms.pop()
+      # Remake text directive with the terms separated by spaces as they should be in the page text
+      newSep = ' '
+      search_string = newSep.join(search_terms)
+      if debug: pywikibot.stdout('         Converted text fragment to string "{}".'.format(search_string))
+      if search_string in page_text:
          found_section = True
+         break
+         if debug and not print_result: pywikibot.stdout('         Found text fragment!')
+   # If we're still here, it's a section link; read linked page to see if it really has this
+   # anchor link
+   if found_section == False:
+      if debug: pywikibot.stdout('         Searching for section link {} on page.'.format(anchor_name))
+      soup = BeautifulSoup(page_text, 'html.parser')
+      # Search for a span with this ID
+      for span_tag in soup.findAll('span'):
+         span_name = span_tag.get('id', None)
+         if span_name == anchor_name:
+            if debug and not print_result: pywikibot.stdout('         Found section in a span!')
+            found_section = True
+            break
    if found_section == False:
       # Search for a div with this ID
 …
    # such as Special:PermanentLink.
    if response.history != []:
       permalink1 = 'Special:PermanentLink/'.lower()
       permalink2 = 'Special:Permalink/'.lower()

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 1194 for ValBot/Python/check_intrawiki_section_links.py

Legend:

ValBot/Python/check_intrawiki_section_links.py

Download in other formats: