I’ve deployed a python screen-scraping App which uses Selenium and chrome-driver (code below) in ECS Fargate, and the App does run. However, I get the following error when chrome-driver is initialized:
[Errno 2] No such file or directory: '/opt/chrome-driver/chromedriver-linux64/chromedriver'
The /opt/chrome-driver
directory is created in the chrome-installer.sh
script, but it’s not found per the error message when the App executes.
I’d appreciate any guidance in tracking this down in my config. Thanks in advance!
Dockerfile
FROM alpine:3.18
#FROM --platform=linux/amd64 alpine:3.18
# ^ Apple Silicon Ref: https://medium.com/block-imperium-games/exec-format-error-or-how-macs-m1s-docker-images-and-aws-ecs-eks-conspired-to-waste-a-weekend-6fcd2ea063d1
RUN apk update
RUN apk add --update py3-pip jq unzip curl
RUN pip install boto3
RUN apk add --no-cache --upgrade bash
# Ref: https://medium.com/@kroeze.wb/running-selenium-in-aws-lambda-806c7e88ec64#:~:text=To%20run%20Selenium%20from%20AWS,Lambda%20image%20for%20Python%203.12)
COPY ./chrome-installer.sh ./chrome-installer.sh
RUN ./chrome-installer.sh
RUN rm ./chrome-installer.sh
RUN pip install selenium
COPY main.py ./
CMD ["python", "main.py"]
chrome-installer.sh
#!/bin/sh
# Script Ref: https://medium.com/@kroeze.wb/running-selenium-in-aws-lambda-806c7e88ec64#:~:text=To%20run%20Selenium%20from%20AWS,Lambda%20image%20for%20Python%203.12)
set -e
latest_stable_json="https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json"
json_data=$(curl -s "$latest_stable_json")
latest_chrome_linux_download_url="$(echo "$json_data" | jq -r ".channels.Stable.downloads.chrome[0].url")"
latest_chrome_driver_linux_download_url="$(echo "$json_data" | jq -r ".channels.Stable.downloads.chromedriver[0].url")"
download_path_chrome_linux="/opt/chrome-headless-shell-linux.zip"
dowload_path_chrome_driver_linux="/opt/chrome-driver-linux.zip"
mkdir -p "/opt/chrome"
curl -Lo $download_path_chrome_linux $latest_chrome_linux_download_url
unzip -q $download_path_chrome_linux -d "/opt/chrome"
rm -rf $download_path_chrome_linux
# [Errno 2] No such file or directory: '/opt/chrome-driver/chromedriver-linux64/chromedriver'
mkdir -p "/opt/chrome-driver"
curl -Lo $dowload_path_chrome_driver_linux $latest_chrome_driver_linux_download_url
unzip -q $dowload_path_chrome_driver_linux -d "/opt/chrome-driver"
rm -rf $dowload_path_chrome_driver_linux
main.py
from tempfile import mkdtemp
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options as ChromeOptions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def init_driver():
chrome_options = ChromeOptions()
chrome_options.add_argument("--headless=new")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
chrome_options.add_argument("--disable-gpu")
chrome_options.add_argument("--disable-dev-tools")
chrome_options.add_argument("--no-zygote")
chrome_options.add_argument("--single-process")
chrome_options.add_argument(f"--user-data-dir={mkdtemp()}")
chrome_options.add_argument(f"--data-path={mkdtemp()}")
chrome_options.add_argument(f"--disk-cache-dir={mkdtemp()}")
chrome_options.add_argument("--remote-debugging-pipe")
chrome_options.add_argument("--verbose")
chrome_options.add_argument("--log-path=/tmp")
chrome_options.add_argument("window-size=1920,1080")
chrome_options.binary_location = "/opt/chrome/chrome-linux64/chrome"
# [Errno 2] No such file or directory: '/opt/chrome-driver/chromedriver-linux64/chromedriver'
service = Service(
executable_path="/opt/chrome-driver/chromedriver-linux64/chromedriver",
service_log_path="/tmp/chromedriver.log"
)
driver = webdriver.Chrome(
service=service,
options=chrome_options
)
return driver
try:
print('START...')
driver = init_driver()
url = 'https://www.wikipedia.org/'
driver.get(url)
print(driver.page_source)
print('DONE')
except Exception as e:
print('An Exception was thrown!')
print(e)
# [Errno 2] No such file or directory: '/opt/chrome-driver/chromedriver-linux64/chromedriver'