I’m doing a fairly simple code on aiflow responsible for downloading data from github. The file is not heavy at all. However, as soon as I launch the DAG, I get this error.
Task exited with return code Negsignal.SIGSEGV
I use the basic airflow config with the SequentialExecutor.
This is my DAG
from datetime import datetime
from airflow import DAG
from airflow.operators.python import PythonOperator
from src.download_netflix import download_netflix_data
with DAG(
dag_id='s3_dag',
schedule='@daily',
start_date=datetime(2022, 3, 1),
catchup=False,
) as dag:
task_scrap_netflix = PythonOperator(
task_id='scrap_netflix',
python_callable=download_netflix_data,
)
task_scrap_netflix
and this is my downloader
import io
import os
import logging
import requests
def fetch_data_from_github(url, **kwargs):
try:
data_dir = os.path.join(os.path.dirname(__file__), "..", "data")
logging.info(f"Data directory: {data_dir}")
local_directory = os.path.join(data_dir, "netflix")
logging.info(f"Local directory: {local_directory}")
os.makedirs(local_directory, exist_ok=True)
logging.info(f"Folder created or exists already: {local_directory}")
filename = url.split("/")[-1]
file_path = os.path.join(local_directory, filename)
logging.info(f"File path: {file_path}")
if os.path.exists(file_path):
logging.info(f"File {file_path} already exists, skipping download.")
return
logging.info(f"Requesting URL: {url}")
response = requests.get(url)
response.raise_for_status()
with open(file_path, 'wb') as file:
file.write(response.content)
logging.info(f"Data downloaded and saved locally to {file_path}.")
except Exception as e:
logging.error(f"An error occurred: {str(e)}")
raise
def download_netflix_data():
GITHUB_DATA_URL = 'https://raw.githubusercontent.com/garg-priya-creator/Netflix-Recommendation-System/main/app/NetflixDataset.csv'
fetch_data_from_github(GITHUB_DATA_URL)
I tried to increase the memory, but without results.
Thank you all for your contributions!