Changeset 1191


Ignore:
Timestamp:
Jul 7, 2024, 12:01:44 AM (5 months ago)
Author:
iritscen
Message:

ValBot: Adjusted check_interwiki_links for new layout code in German Wikipedia that places the section name in an h2, h3, etc. instead of a span or div.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • ValBot/Python/check_interwiki_links.py

    r1185 r1191  
    6363   soup = BeautifulSoup(page_text, 'html.parser')
    6464   found_section = False
    65    for span_tag in soup.findAll('span'): # search for span with ID matching the section name
    66       span_name = span_tag.get('id', None)
    67       if span_name == anchor_name:
     65   for the_tag in soup.findAll('span'): # search for span with ID matching the section name
     66      tag_name = the_tag.get('id', None)
     67      if tag_name == anchor_name:
    6868         found_section = True
    6969         break
    7070   if found_section == False:
    71       for span_tag in soup.findAll('div'): # search for div with ID matching the section name
    72          span_name = span_tag.get('id', None)
    73          if span_name == anchor_name:
     71      for the_tag in soup.findAll('div'): # search for div with ID matching the section name
     72         tag_name = the_tag.get('id', None)
     73         if tag_name == anchor_name:
     74            found_section = True
     75            break
     76   if found_section == False:
     77      for the_tag in soup.findAll('h2'): # search for h2 with ID matching the section name
     78         tag_name = the_tag.get('id', None)
     79         if tag_name == anchor_name:
     80            found_section = True
     81            break
     82   if found_section == False:
     83      for the_tag in soup.findAll('h3'): # search for h3 with ID matching the section name
     84         tag_name = the_tag.get('id', None)
     85         if tag_name == anchor_name:
     86            found_section = True
     87            break
     88   if found_section == False:
     89      for the_tag in soup.findAll('h4'): # search for h4 with ID matching the section name
     90         tag_name = the_tag.get('id', None)
     91         if tag_name == anchor_name:
    7492            found_section = True
    7593            break
Note: See TracChangeset for help on using the changeset viewer.