Problem Details
I am working on a personal project to scrape dynamic data from the Small and Medium Enterprise Agency’s database using Python and Selenium. The webpage displays company information in a “card” format, and clicking a button dynamically loads a table containing fee details.
However, I am encountering the following issues:
The button is detected and clicked successfully (verified with print statements).
The dynamic table data does not appear in driver.page_source, even after waiting for the table to load.
The target page: https://ma-shienkikan.go.jp/search
Below is the code for clicking the button and attempting to retrieve the table data:
def get_fee_details_optimized(card, fee_type, side):
try:
# Determine button color based on fee_type
if fee_type == "FA":
button_color = "bg-yellow-500"
elif fee_type == "仲介":
button_color = "bg-green-500"
else:
raise ValueError(f"Invalid fee_type: {fee_type}")
# Find the button based on side (either transferor or transferee)
button = card.find_element(
By.XPATH,
f".//a[contains(@class, '{button_color}') and contains(text(), '{side}側')]"
)
# Use JavaScript to click the button
driver.execute_script("arguments[0].click();", button)
# Wait for the table to appear
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "table")))
# Parse the updated page source with BeautifulSoup
updated_html = driver.page_source
updated_soup = BeautifulSoup(updated_html, "html.parser")
table = updated_soup.find("table")
if table:
return table.prettify()
else:
return "No table found"
except Exception as e:
print(f"Error retrieving fee details for {fee_type} {side}側: {e}")
return "N/A"
Issues
- Clicking the button does not result in the corresponding table data being retrieved.
- The driver.page_source does not contain the dynamically loaded table data as expected.
What I Tried
-
Verified that the button is detected and clicked successfully using print statements.
-
Manually tested the button click in the browser to ensure the table data loads as expected.
-
Used WebDriverWait to allow sufficient time for the table to load.
-
Inspected the network activity in the browser’s developer tools to identify related API calls.
Hypotheses
- The page may send additional network requests to fetch the table data, and Selenium may not capture the updated content.
- The button click might not be correctly triggering the required event in the browser session.
Questions
- How can I reliably retrieve the dynamically loaded table data after clicking the button with Selenium?
- If the table data is fetched via an API call, how can I identify and directly retrieve the data?
Full Code
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
# Path to ChromeDriver
chromedriver_path = "/Users/kazatoy/PLEX/chromedriver"
# Chrome options
options = Options()
options.add_argument("--headless") # Uncomment if headless mode is needed
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
# Set up ChromeDriver service
service = Service(chromedriver_path)
driver = webdriver.Chrome(service=service, options=options)
# Data storage
data = []
# Base URL
base_url = "https://ma-shienkikan.go.jp/search"
# Format table data
def format_table_data(table_html):
soup = BeautifulSoup(table_html, "html.parser")
rows = soup.find("tbody").find_all("tr")
formatted_rows = []
for row in rows:
cols = row.find_all("td")
if len(cols) == 2:
# Extract range data and fee rate
amount_range = cols[0].get_text(strip=True)
rate = cols[1].get_text(strip=True)
formatted_rows.append(f"{amount_range} {rate}")
return "n".join(formatted_rows)
# Function to click button and retrieve table data
def get_fee_details_optimized(card, fee_type, side):
try:
# Determine button color based on fee_type
if fee_type == "FA":
button_color = "bg-yellow-500"
elif fee_type == "仲介":
button_color = "bg-green-500"
else:
raise ValueError(f"Invalid fee_type: {fee_type}")
# Find the button based on side (either transferor or transferee)
button = card.find_element(
By.XPATH,
f".//a[contains(@class, '{button_color}') and contains(text(), '{side}側')]"
)
print(button)
# Click the button using JavaScript
driver.execute_script("arguments[0].click();", button)
print(f"Clicked Button: {fee_type} {side}側 using JavaScript.")
# Wait for the table to appear
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "table")))
# Parse the updated page source
updated_html = driver.page_source
updated_soup = BeautifulSoup(updated_html, "html.parser")
# Retrieve table data
table = updated_soup.find("table")
if table:
return format_table_data(table.prettify())
else:
print(f"No table found for {fee_type} {side}側")
return "No table found"
except Exception as e:
print(f"Error retrieving fee details for {fee_type} {side}側: {e}")
return "N/A"
try:
for page in range(1, 142): # Adjust page range as needed
print(f"Processing page {page}")
driver.get(f"{base_url}?page={page}")
# Wait for cards to be visible
WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.CLASS_NAME, "section-card")))
cards = driver.find_elements(By.CLASS_NAME, "section-card")
for card in cards:
try:
card_html = card.get_attribute('outerHTML')
soup = BeautifulSoup(card_html, "html.parser")
# Extract company name
company_name = soup.find("h1", class_="text-base sm:text-lg py-4 font-bold bg-main text-white px-4 -mx-4 sm:-mx-8")
company_name = company_name.find("div", class_="inline-block").get_text(strip=True) if company_name else "N/A"
if company_name == "N/A":
continue
# Extract additional data
support_type = soup.find("span", text="M&A支援機関の種類")
support_type = support_type.find_next("span").get_text(strip=True) if support_type else "N/A"
# Retrieve table data for each button
mediator_fee_transferee = get_fee_details_optimized(card, "仲介", "譲受")
mediator_fee_transferor = get_fee_details_optimized(card, "仲介", "譲渡")
fa_fee_transferee = get_fee_details_optimized(card, "FA", "譲受")
fa_fee_transferor = get_fee_details_optimized(card, "FA", "譲渡")
# Save data
data.append([
company_name, support_type, mediator_fee_transferee,
mediator_fee_transferor, fa_fee_transferee, fa_fee_transferor
])
except Exception as e:
print(f"Error processing card: {e}")
finally:
driver.quit()
# Save data to CSV
df = pd.DataFrame(data, columns=["Company Name", "Support Type", "Mediator Fee (Transferee)", "Mediator Fee (Transferor)", "FA Fee (Transferee)", "FA Fee (Transferor)"])
output_file = "scraped_data.csv"
df.to_csv(output_file, index=False, encoding="utf-8-sig")
print(f"Data saved to {output_file}")
The table I want to retrieve is the attached image section
Kazato Yoshida is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.
You have not handled the case where a card doesn’t contain green or yellow colored buttons.
There are cases where the buttons are disabled and it is grey. Clicking on them would not return a table.
When you get the error “No such element exists,” that button is disabled, and the data you are trying to retrieve doesn’t exist.
To reliably get the data, you have to check if the buttons are disabled or not.
Instead of finding the button elements by bg-color try to find the buttons first and then check if they are grey or not. That way you can verify that the button is disabled and there is no table of data to collect.
If it isn’t grey, you can continue with collecting data. Otherwise, continue with the next card.
I have updated the get_fee_details_optimized
function in the following code.
Updated the button XPath so that it only gets the buttons of the row based on the fee_type value and side value without checking its color.
Then got the class value in button_color
and checked if it didn’t contain bg-gray-300
before clicking.
Try this,
# Function to click button and retrieve table data
def get_fee_details_optimized(card, fee_type, side):
try:
# Determine button color based on fee_type
if fee_type == "FA":
button_color = "bg-yellow-500"
elif fee_type == "仲介":
button_color = "bg-green-500"
else:
raise ValueError(f"Invalid fee_type: {fee_type}")
# Find the button based on side (either transferor or transferee)
button = card.find_element(
By.XPATH,
f"//div[@x-ref='container']/div[span[contains(.,'{fee_type}手数料体系')]]/div[@class='flex gap-2']/a[contains(.,'{side}側')]"
)
button_color = button.get_attribute('class')
# Click the button using JavaScript when it is not disabled
if 'bg-gray-300' not in button_color:
driver.execute_script("arguments[0].click();", button)
print(f"Clicked Button: {fee_type} {side}側 using JavaScript.")
# Wait for the table to appear
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "table")))
# Parse the updated page source
updated_html = driver.page_source
updated_soup = BeautifulSoup(updated_html, "html.parser")
# Retrieve table data
table = updated_soup.find("table")
if table:
return format_table_data(table.prettify())
else:
print(f"No table found for {fee_type} {side}側")
return "No table found"
except Exception as e:
print(f"Error retrieving fee details for {fee_type} {side}側: {e}")
return "N/A"