Thiết kế website giá rẻ

Question

I am trying to scrape articles from several hyperlinks, get the title of the article & create a summary with the contents of the first 4 paragraphs & save the output in a .png file. But it gives error – UnicodeEncodeError: ‘latin-1’ codec can’t encode character ‘u2019’ in position 4: ordinal not in range(256)

import requests
from bs4 import BeautifulSoup
from PIL import Image, ImageDraw, ImageFont

# Function to get title and summary from a URL
def extract_info(url):
    try:
    response = requests.get(url)
    response.encoding = 'utf-8'  # Ensure UTF-8 encoding
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Extract title
    title = soup.title.string.strip() if soup.title and soup.title.string else 'No Title Found'
    
    # Extract the first 4 paragraphs as summary
    paragraphs = soup.find_all('p')
    if paragraphs:
        summary_paragraphs = [p.get_text(strip=True) for p in paragraphs[:4] if p.get_text(strip=True)]
        summary = 'nn'.join(summary_paragraphs) if summary_paragraphs else 'No Summary Found'
    else:
        summary = 'No Summary Found'
    
    return title, summary
except Exception as e:
    return 'Error fetching details', str(e)

# List of URLs
urls = [
     "https://www.cisa.gov/news-events/events",
    "https://www.cisa.gov/news-events/cybersecurity-advisories",
    "https://www.cisa.gov/news-events/alerts/2024/08/14/adobe-releases-security- 
     updates-multiple-products",
    "https://www.youtube.com/@cisagov",
    "https://www.dhs.gov",
    "https://www.kaspersky.com/home-security? 
 icid=gl_securelisheader_acq_ona_smm__onl_b2c_securelist_prodmen_______",
  ]

  # Collect all the data
data = []
for url in urls:
    print(f"Processing URL: {url}")
    title, summary = extract_info(url)
    data.append((title, url, summary))

# Determine the image height dynamically
 width = 1200
 padding = 50  # Padding around text
 current_height = padding

# Use a Unicode-compatible font
try:
   font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf"  
   font_title = ImageFont.truetype(font_path, 24)
   font_url = ImageFont.truetype(font_path, 20)
   font_summary = ImageFont.truetype(font_path, 18)
except:
   font_title = ImageFont.load_default()
   font_url = ImageFont.load_default()
   font_summary = ImageFont.load_default()

# Create a dummy image to calculate text size
dummy_img = Image.new('RGB', (width, 1000))
draw = ImageDraw.Draw(dummy_img)

def calculate_text_height(draw, text, font):
# Calculate the bounding box of the text and return its height
   bbox = draw.multiline_textbbox((0, 0), text, font=font)
   return bbox[3] - bbox[1]

for title, url, summary in data:
   # Calculate height for title
    title_height = calculate_text_height(draw, title, font_title)
    current_height += title_height + 10  # 10 pixels spacing

 # Calculate height for url
url_height = calculate_text_height(draw, url, font_url)
current_height += url_height + 10

# Calculate height for summary
summary_height = calculate_text_height(draw, summary, font_summary)
current_height += summary_height + 30  # 30 pixels spacing after summary

# Add some bottom padding
current_height += padding

  # Create the final image
image = Image.new('RGB', (width, current_height), color=(255, 255, 255))
draw = ImageDraw.Draw(image)

y_text = padding
for title, url, summary in data:
    # Draw title
    draw.text((padding, y_text), title, font=font_title, fill="black")
    y_text += calculate_text_height(draw, title, font_title) + 10  # 10 pixels spacing

# Draw URL
draw.text((padding, y_text), url, font=font_url, fill="blue")
y_text += calculate_text_height(draw, url, font_url) + 10

# Draw Summary
draw.multiline_text((padding, y_text), summary, font=font_summary, fill="black", spacing=4)
y_text += calculate_text_height(draw, summary, font_summary) + 30  # 30 pixels spacing after summary

# Save the image in the current working directory
image.save('website_info.png')

# Display the image
image.show()

Thiết kế website giá rẻ

Danh mục

Scraping online articles for summary using Python