I’m using Prophet to forecast moisture sensor data for multiple farmers, with the data recorded at 15-minute intervals. I’ve added external regressors (Rain and Temperature) to the model. Below is a sample of my data:
- Training Data-frame (
Machine_Learning_Data.csv
):
Farmer_Id ds y Rain Temperature
47 2023-05-12 10:00:00 -2.0039 0 36
47 2023-05-12 10:15:00 -2.1148 0 36
47 2023-05-12 10:30:00 -2.1702 0 36
- Prediction Data-frame (
predict.csv
):
Farmer_Id ds Rain Temperature
47 2024-08-23 00:00:00 0 25
47 2024-08-23 00:15:00 0 25
47 2024-08-23 00:30:00 0 25
47 2024-08-23 00:45:00 832000 25
47 2024-08-23 01:00:00 0 25
My expectation is that after adding water at 2024-08-23 00:45:00
, the predicted moisture should increase. However, the model predicts the value to decrease significantly. The output I am getting is:
ds yhat
2024-08-23 00:00:00 -14.055044
2024-08-23 00:15:00 -14.119256
2024-08-23 00:30:00 -14.177260
2024-08-23 00:45:00 -40.969768
2024-08-23 01:00:00 -14.273644
2024-08-23 01:15:00 -14.313095
2024-08-23 01:30:00 -14.348438
2024-08-23 01:45:00 -14.381592
2024-08-23 02:00:00 -14.414816
output I am expecting is:
ds yhat
2024-08-23 00:00:00 -14.055044
2024-08-23 00:15:00 -14.119256
2024-08-23 00:30:00 -14.177260
2024-08-23 00:45:00 -40.969768
2024-08-23 01:00:00 -14.273644
2024-08-23 01:15:00 -14.313095
2024-08-23 01:30:00 -14.348438
2024-08-23 01:45:00 -14.381592
2024-08-23 02:00:00 -14.414816
Issues:
- Prediction Direction: The sensor value moves in the wrong direction after water(832000-Rain) is added(-40.969768). Could this be due to the negative values in the dataset?
- Impact on Time-Series: Adding water only affects the current row, while I expect the effect to persist over subsequent time steps. Is this a limitation of the Prophet model, or am I handling the external regressor incorrectly?
Here is a simplified version of my code for reference:
import pandas as pd
from prophet import Prophet
import pickle
class MultiFarmerForecasting:
def __init__(self, farmer_data_list):
self.farmer_data_list = farmer_data_list
self.models = {}
def create_model(self, changepoint_prior_scale=10,
seasonality_prior_scale=100.0):
model = Prophet(
changepoint_prior_scale=changepoint_prior_scale,
seasonality_prior_scale=seasonality_prior_scale
)
# Add regressors for Rain and Temperature
model.add_regressor('Rain', prior_scale=0.5, standardize=True, mode='additive')
model.add_regressor('Temperature', prior_scale=0.1, standardize=True, mode='additive')
return model
def clean_invalid_dates(self, df, date_column='ds'):
try:
df[date_column] = pd.to_datetime(df[date_column])
except Exception as e:
print(f"Cleaning invalid dates: {e}")
df = df[pd.to_datetime(df[date_column], errors='coerce').notna()]
return df
def add_lagged_features(self, df, water_column='Rain', lag=10):
df = df.copy()
df[f'Lagged_Water_{lag}_mins'] = df[water_column].shift(lag)
return df
def train_model(self, farmer_id, farmer_df):
# Clean invalid dates
farmer_df = self.clean_invalid_dates(farmer_df)
# Add lagged features
farmer_df = self.add_lagged_features(farmer_df, water_column='Rain', lag=10)
# Remove rows with NaN values due to shifting
farmer_df = farmer_df.dropna()
# Create and train the model
model = self.create_model()
model.fit(farmer_df)
self.models[farmer_id] = model
def predict(self, farmer_id, future_df):
model = self.models.get(farmer_id)
if model is None:
raise ValueError(f"No model found for farmer {farmer_id}")
# Add lagged features to future data
future_df = self.add_lagged_features(future_df, water_column='Rain', lag=10)
future = model.make_future_dataframe(periods= 1)
# Predict using the model
forecast = model.predict(future_df)
return forecast[['ds', 'yhat']]
def save_model_and_forecast(self, farmer_id, forecast):
# Save the model
model_filename = f"farmer_{farmer_id}_model.pkl"
with open(model_filename, 'wb') as model_file:
pickle.dump(self.models[farmer_id], model_file)
print(f"Model saved as {model_filename}")
if __name__ == "__main__":
df = pd.read_csv('Machine_Learning_Data.csv')
farmer_data_list = {farmer_id: farmer_df for farmer_id, farmer_df in df.groupby('Farmer_Id')}
multi_farmer_forecasting = MultiFarmerForecasting(farmer_data_list)
# Train the model for each farmer
for farmer_id, farmer_df in farmer_data_list.items():
multi_farmer_forecasting.train_model(farmer_id, farmer_df)
# Predict for future periods
future_df = pd.read_csv('predict.csv') # Load future data with Rain and Temperature columns
farmer_id = 47 # Example farmer ID
predictions = multi_farmer_forecasting.predict(farmer_id, future_df)
print(predictions)
forecast_filename = f"farmer_{farmer_id}_forecast.csv"
predictions.to_csv(forecast_filename, index=False)
print(f"Forecast saved as {forecast_filename}")
# Save the model and forecast
multi_farmer_forecasting.save_model_and_forecast(farmer_id, predictions)
# Plot the forecast vs actual data
actual_df = farmer_data_list[farmer_id] # Actual data
How to address these issues?
1