I’m attempting to scrape image URLs from a webpage using BeautifulSoup and requests in Python. The webpage I’m targeting is the Minecraft wiki page for blocks. However, I’m encountering an “IndentationError: unexpected indent” at line 35, which is for img_tag in soup.find_all(‘img’, class_=’mw-file-element’):. I’m not sure what’s causing this issue. Can anyone help me identify and resolve the problem?
from bs4 import BeautifulSoup
import requests
import re
# URL of the website containing the block names and images
url = 'https://minecraft.wiki/w/Block'
# Send a GET request to the URL
response = requests.get(url)
# Parse the HTML content of the page
soup = BeautifulSoup(response.text, 'html.parser')
# Find all elements containing block names and images
blocks = []
for li in soup.find_all('li'):
name_element = li.find('a', class_='mw-redirect')
if name_element:
name = name_element.text.strip()
url = 'https://minecraft.wiki/w/' + re.sub(r's+', '_', name)
blocks.append({'name': name, 'url': url})
for block in blocks:
print(block['name'])
print(block['url'])
images = []
for block in blocks:
block_url = block['url']
response = requests.get(block_url)
soup = BeautifulSoup(response.text, 'html.parser')
for i in soup.find_all('img'):
image_element = i.find('img', class_='mw-file-element')
if image_element:
srcset = image_element.get('srcset', '')
original_image_url = re.search(r'^(.+.png)', srcset)
if original_image_url:
image_url = 'https://minecraft.wiki' + original_image_url.group(1)
images.append({'image:' : image_url})
for image in images:
print(image['image'])
New contributor
giacomo abb is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.