I’ve developed a web scraping script using Selenium that works perfectly on my local computer. However, when I try to run the same script within a Docker container, it fails. The error message indicates that Selenium is unable to locate specific elements on the page, even though they are present.
When i add code stop working chrome_options.add_argument("--headless")
My code:
<code>chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920x1080")
# chrome_options.binary_location = '/usr/local/bin/chromedriver'
service = Service('/usr/local/bin/chromedriver')
driver = webdriver.Chrome(options=chrome_options,service=service)
# driver = webdriver.Chrome()
driver.get("https://megamarket.ru/catalog/?q=%D0%B8%D0%B3%D1%80%D0%BE%D0%B2%D0%BE%D0%B5%20%D0%BA%D1%80%D0%B5%D1%81%D0%BB%D0%BE")
driver.implicitly_wait(0.2)
catalog_container = driver.find_element(By.CSS_SELECTOR, 'div.catalog-items-list')
product_elements = catalog_container.find_elements(By.CSS_SELECTOR, '[data-test="product-item"]')
selected_products = product_elements[:20]
product_data = []
product_links = []
product_image_urls = []
for product in selected_products:
try:
product_link_element = product.find_element(By.TAG_NAME, 'a')
product_link = product_link_element.get_attribute('href')
product_links.append(product_link)
product_image_element = driver.find_element(By.CSS_SELECTOR, 'img[data-test="product-image"]')
product_image_url = product_image_element.get_attribute('src')
product_image_urls.append(product_image_url)
except Exception as e:
print(f'Ошибка при получении URL: {e}')
for i, product_link in enumerate(product_links):
driver.get(product_link)
try:
product_name = driver.find_element(By.CSS_SELECTOR, 'h1[itemprop="name"]').text
product_price = driver.find_element(By.CSS_SELECTOR, 'span.sales-block-offer-price__price-final').text
product_price = product_price.replace('₽', '').strip()
product_image_url = product_image_urls[i]
try:
product_description = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'div[itemprop="description"]'))
).text.strip()
except TimeoutException:
product_description = 'Описание отсутствует.'
product_data.append({
'Product URL': product_link,
'Product Name': product_name,
'Price (RUB)': product_price,
'Image URL': product_image_url,
'Description': product_description,
})
except Exception as e:
print(f'Произошла ошибка {e}')
driver.quit()
</code>
<code>chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920x1080")
# chrome_options.binary_location = '/usr/local/bin/chromedriver'
service = Service('/usr/local/bin/chromedriver')
driver = webdriver.Chrome(options=chrome_options,service=service)
# driver = webdriver.Chrome()
driver.get("https://megamarket.ru/catalog/?q=%D0%B8%D0%B3%D1%80%D0%BE%D0%B2%D0%BE%D0%B5%20%D0%BA%D1%80%D0%B5%D1%81%D0%BB%D0%BE")
driver.implicitly_wait(0.2)
catalog_container = driver.find_element(By.CSS_SELECTOR, 'div.catalog-items-list')
product_elements = catalog_container.find_elements(By.CSS_SELECTOR, '[data-test="product-item"]')
selected_products = product_elements[:20]
product_data = []
product_links = []
product_image_urls = []
for product in selected_products:
try:
product_link_element = product.find_element(By.TAG_NAME, 'a')
product_link = product_link_element.get_attribute('href')
product_links.append(product_link)
product_image_element = driver.find_element(By.CSS_SELECTOR, 'img[data-test="product-image"]')
product_image_url = product_image_element.get_attribute('src')
product_image_urls.append(product_image_url)
except Exception as e:
print(f'Ошибка при получении URL: {e}')
for i, product_link in enumerate(product_links):
driver.get(product_link)
try:
product_name = driver.find_element(By.CSS_SELECTOR, 'h1[itemprop="name"]').text
product_price = driver.find_element(By.CSS_SELECTOR, 'span.sales-block-offer-price__price-final').text
product_price = product_price.replace('₽', '').strip()
product_image_url = product_image_urls[i]
try:
product_description = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'div[itemprop="description"]'))
).text.strip()
except TimeoutException:
product_description = 'Описание отсутствует.'
product_data.append({
'Product URL': product_link,
'Product Name': product_name,
'Price (RUB)': product_price,
'Image URL': product_image_url,
'Description': product_description,
})
except Exception as e:
print(f'Произошла ошибка {e}')
driver.quit()
</code>
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--window-size=1920x1080")
# chrome_options.binary_location = '/usr/local/bin/chromedriver'
service = Service('/usr/local/bin/chromedriver')
driver = webdriver.Chrome(options=chrome_options,service=service)
# driver = webdriver.Chrome()
driver.get("https://megamarket.ru/catalog/?q=%D0%B8%D0%B3%D1%80%D0%BE%D0%B2%D0%BE%D0%B5%20%D0%BA%D1%80%D0%B5%D1%81%D0%BB%D0%BE")
driver.implicitly_wait(0.2)
catalog_container = driver.find_element(By.CSS_SELECTOR, 'div.catalog-items-list')
product_elements = catalog_container.find_elements(By.CSS_SELECTOR, '[data-test="product-item"]')
selected_products = product_elements[:20]
product_data = []
product_links = []
product_image_urls = []
for product in selected_products:
try:
product_link_element = product.find_element(By.TAG_NAME, 'a')
product_link = product_link_element.get_attribute('href')
product_links.append(product_link)
product_image_element = driver.find_element(By.CSS_SELECTOR, 'img[data-test="product-image"]')
product_image_url = product_image_element.get_attribute('src')
product_image_urls.append(product_image_url)
except Exception as e:
print(f'Ошибка при получении URL: {e}')
for i, product_link in enumerate(product_links):
driver.get(product_link)
try:
product_name = driver.find_element(By.CSS_SELECTOR, 'h1[itemprop="name"]').text
product_price = driver.find_element(By.CSS_SELECTOR, 'span.sales-block-offer-price__price-final').text
product_price = product_price.replace('₽', '').strip()
product_image_url = product_image_urls[i]
try:
product_description = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'div[itemprop="description"]'))
).text.strip()
except TimeoutException:
product_description = 'Описание отсутствует.'
product_data.append({
'Product URL': product_link,
'Product Name': product_name,
'Price (RUB)': product_price,
'Image URL': product_image_url,
'Description': product_description,
})
except Exception as e:
print(f'Произошла ошибка {e}')
driver.quit()
My Dockerfile:
<code>FROM python:3.11.0
RUN apt-get update && apt-get install -y wget unzip chromium &&
wget https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/120.0.6099.109/linux64/chromedriver-linux64.zip
-O /tmp/chromedriver-linux64.zip &&
unzip /tmp/chromedriver-linux64.zip -d /tmp &&
mv /tmp/chromedriver-linux64/chromedriver /usr/local/bin &&
chmod +x /usr/local/bin/chromedriver
WORKDIR /app
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
COPY . .
CMD ["python", "main.py"]
</code>
<code>FROM python:3.11.0
RUN apt-get update && apt-get install -y wget unzip chromium &&
wget https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/120.0.6099.109/linux64/chromedriver-linux64.zip
-O /tmp/chromedriver-linux64.zip &&
unzip /tmp/chromedriver-linux64.zip -d /tmp &&
mv /tmp/chromedriver-linux64/chromedriver /usr/local/bin &&
chmod +x /usr/local/bin/chromedriver
WORKDIR /app
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
COPY . .
CMD ["python", "main.py"]
</code>
FROM python:3.11.0
RUN apt-get update && apt-get install -y wget unzip chromium &&
wget https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/120.0.6099.109/linux64/chromedriver-linux64.zip
-O /tmp/chromedriver-linux64.zip &&
unzip /tmp/chromedriver-linux64.zip -d /tmp &&
mv /tmp/chromedriver-linux64/chromedriver /usr/local/bin &&
chmod +x /usr/local/bin/chromedriver
WORKDIR /app
COPY requirements.txt requirements.txt
RUN pip install -r requirements.txt
COPY . .
CMD ["python", "main.py"]
My docker-compose:
<code>version: '3'
services:
selenium:
build:
context: .
volumes:
- ./AbeloOutput:/output
</code>
<code>version: '3'
services:
selenium:
build:
context: .
volumes:
- ./AbeloOutput:/output
</code>
version: '3'
services:
selenium:
build:
context: .
volumes:
- ./AbeloOutput:/output
New contributor
asd is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.