I am running several browsers in separate threads with help of selenium 4.2.0 (FireFox)
and celery in docker container.
Program is working fine, but for significant amount of data I have an error (see trace below)
I read about the error https://www.pythonanywhere.com/forums/topic/1298/
Looks like it deals with breach of OS limit.
How to understand reason of the error and ways to fix it?
<code>Traceback (most recent call last):
celery-1 | File "/project/parser_app/celery_tasks.py", line 221, in check_urls
celery-1 | parser = YandexParser()
celery-1 | File "/project/parser_app/parser/common.py", line 195, in __init__
celery-1 | self.driver = get_firefox_driver(proxy_data=proxy_data)
celery-1 | File "/project/parser_app/parser/common.py", line 79, in get_firefox_driver
celery-1 | driver = webdriver.Firefox(options=options, seleniumwire_options=proxy_options, firefox_profile=firefox_profile)
celery-1 | File "/usr/local/lib/python3.9/site-packages/seleniumwire/webdriver.py", line 179, in __init__
celery-1 | super().__init__(*args, **kwargs)
celery-1 | File "/usr/local/lib/python3.9/site-packages/selenium/webdriver/firefox/webdriver.py", line 172, in __init__
celery-1 | self.service.start()
celery-1 | File "/usr/local/lib/python3.9/site-packages/selenium/webdriver/common/service.py", line 71, in start
celery-1 | self.process = subprocess.Popen(cmd, env=self.env,
celery-1 | File "/usr/local/lib/python3.9/subprocess.py", line 951, in __init__
celery-1 | self._execute_child(args, executable, preexec_fn, close_fds,
celery-1 | File "/usr/local/lib/python3.9/subprocess.py", line 1770, in _execute_child
celery-1 | self.pid = _posixsubprocess.fork_exec(
celery-1 | BlockingIOError: [Errno 11] Resource temporarily unavailable
</code>
<code>Traceback (most recent call last):
celery-1 | File "/project/parser_app/celery_tasks.py", line 221, in check_urls
celery-1 | parser = YandexParser()
celery-1 | File "/project/parser_app/parser/common.py", line 195, in __init__
celery-1 | self.driver = get_firefox_driver(proxy_data=proxy_data)
celery-1 | File "/project/parser_app/parser/common.py", line 79, in get_firefox_driver
celery-1 | driver = webdriver.Firefox(options=options, seleniumwire_options=proxy_options, firefox_profile=firefox_profile)
celery-1 | File "/usr/local/lib/python3.9/site-packages/seleniumwire/webdriver.py", line 179, in __init__
celery-1 | super().__init__(*args, **kwargs)
celery-1 | File "/usr/local/lib/python3.9/site-packages/selenium/webdriver/firefox/webdriver.py", line 172, in __init__
celery-1 | self.service.start()
celery-1 | File "/usr/local/lib/python3.9/site-packages/selenium/webdriver/common/service.py", line 71, in start
celery-1 | self.process = subprocess.Popen(cmd, env=self.env,
celery-1 | File "/usr/local/lib/python3.9/subprocess.py", line 951, in __init__
celery-1 | self._execute_child(args, executable, preexec_fn, close_fds,
celery-1 | File "/usr/local/lib/python3.9/subprocess.py", line 1770, in _execute_child
celery-1 | self.pid = _posixsubprocess.fork_exec(
celery-1 | BlockingIOError: [Errno 11] Resource temporarily unavailable
</code>
Traceback (most recent call last):
celery-1 | File "/project/parser_app/celery_tasks.py", line 221, in check_urls
celery-1 | parser = YandexParser()
celery-1 | File "/project/parser_app/parser/common.py", line 195, in __init__
celery-1 | self.driver = get_firefox_driver(proxy_data=proxy_data)
celery-1 | File "/project/parser_app/parser/common.py", line 79, in get_firefox_driver
celery-1 | driver = webdriver.Firefox(options=options, seleniumwire_options=proxy_options, firefox_profile=firefox_profile)
celery-1 | File "/usr/local/lib/python3.9/site-packages/seleniumwire/webdriver.py", line 179, in __init__
celery-1 | super().__init__(*args, **kwargs)
celery-1 | File "/usr/local/lib/python3.9/site-packages/selenium/webdriver/firefox/webdriver.py", line 172, in __init__
celery-1 | self.service.start()
celery-1 | File "/usr/local/lib/python3.9/site-packages/selenium/webdriver/common/service.py", line 71, in start
celery-1 | self.process = subprocess.Popen(cmd, env=self.env,
celery-1 | File "/usr/local/lib/python3.9/subprocess.py", line 951, in __init__
celery-1 | self._execute_child(args, executable, preexec_fn, close_fds,
celery-1 | File "/usr/local/lib/python3.9/subprocess.py", line 1770, in _execute_child
celery-1 | self.pid = _posixsubprocess.fork_exec(
celery-1 | BlockingIOError: [Errno 11] Resource temporarily unavailable
I checked number of avaliable threads
<code>cat /proc/sys/kernel/pid_max
4194304
</code>
<code>cat /proc/sys/kernel/pid_max
4194304
</code>
cat /proc/sys/kernel/pid_max
4194304
Also checked number of zombie processes
<code>ps aux | awk '$8 ~ /^[Zz]/' | wc -l
35990
</code>
<code>ps aux | awk '$8 ~ /^[Zz]/' | wc -l
35990
</code>
ps aux | awk '$8 ~ /^[Zz]/' | wc -l
35990
And disk free space
<code>~/new_app$ df -h --total
Filesystem Size Used Avail Use% Mounted on
udev 16G 0 16G 0% /dev
tmpfs 3.2G 1004K 3.2G 1% /run
/dev/sda2 9.8G 3.3G 6.0G 36% /
tmpfs 16G 0 16G 0% /dev/shm
tmpfs 5.0M 0 5.0M 0% /run/lock
/dev/sdb 100G 5.1G 95G 6% /data
tmpfs 3.2G 0 3.2G 0% /run/user/1001
total 148G 8.4G 139G 6% -
</code>
<code>~/new_app$ df -h --total
Filesystem Size Used Avail Use% Mounted on
udev 16G 0 16G 0% /dev
tmpfs 3.2G 1004K 3.2G 1% /run
/dev/sda2 9.8G 3.3G 6.0G 36% /
tmpfs 16G 0 16G 0% /dev/shm
tmpfs 5.0M 0 5.0M 0% /run/lock
/dev/sdb 100G 5.1G 95G 6% /data
tmpfs 3.2G 0 3.2G 0% /run/user/1001
total 148G 8.4G 139G 6% -
</code>
~/new_app$ df -h --total
Filesystem Size Used Avail Use% Mounted on
udev 16G 0 16G 0% /dev
tmpfs 3.2G 1004K 3.2G 1% /run
/dev/sda2 9.8G 3.3G 6.0G 36% /
tmpfs 16G 0 16G 0% /dev/shm
tmpfs 5.0M 0 5.0M 0% /run/lock
/dev/sdb 100G 5.1G 95G 6% /data
tmpfs 3.2G 0 3.2G 0% /run/user/1001
total 148G 8.4G 139G 6% -