I am building a DAG which uses pandas, but its not reflecting on the airflow WebUi showing pandas is not found. I have it install on my local machine as well as wsl enable ubuntu.
When I run python environment on both an import it, its working and also showing the version that is installed.
Windows
Ubuntu
import requests as re
from io import StringIO as si
from airflow import DAG
from airflow.operators.python import PythonOperator
import pandas as pd
from datetime import datetime, timedelta
import sys
import os
sys.path.insert(0,os.path.abspath(os.path.dirname(__file__)))
default_args = {
'owner' : '****',
'retires' : 5,
'retry_delay' : timedelta(minutes=3)
}
def fetch_data():
url = re.get("https://catalog.ourworldindata.org/explorers/who/latest/monkeypox/monkeypox.csv")
if url.status_code == 200:
csv_data=si(url.text)
df = pd.read_csv(csv_data, low_memory=False)
new_df = df[['location', 'date', 'iso_code', 'total_cases', 'total_deaths', 'new_cases', 'new_deaths']]
print(new_df.shape)
with DAG(
default_args=default_args,
dag_id='owid_data_pipeline',
description='DAG to fetch and load owid mpox daily data into postgres',
start_date=datetime(2024, 12, 18),
schedule_interval='@daily'
) as dag :
task1 = PythonOperator(
task_id='fetch_data',
python_callable=fetch_data
)
task1
Error
Please suggest guys as I have been scratching my head over this for an entire day now.
Thank you. GGs.
1