I have a database of Index([‘Year’, ‘Month’, ‘Day’, ‘Hour_0’, ‘Hour_1’, ‘Hour_2’) , ‘hour_3’,
‘hour_4’, ‘hour_5’, ‘hour_6’, ‘hour_7’, ‘hour_8’, ‘hour_9′,
’10_hour’, ’11_hour’, ’12_hour’, ’13_hour’, ’14_hour’,
’15_hour’, ’16_hour’, ’17_hour’, ’18_hour’, ’19_hour’,
‘hour_20’, ‘hour_21’, ‘hour_22’, ‘hour_23′],
dtype=’object’) 11305 rows x 27 columns
I want to perform an anomaly detection for every hour, but when the results are shown, the hours are added together, and this is an error. It should remain from 0 to 23 hours every day. I have no experience in solving this problem.
I need help and thank you for your response
my code :
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objs as go
from keras.models import Model
from keras.layers import Input, Dense
# Read the dataset
df = pd.read_csv(r'C:UserssaturnDesktopadfNew-Fahrten_time.csv')
# Define the scaler
scaler = MinMaxScaler()
# Initialize a dictionary to store reconstruction errors for each hour
reconstruction_errors = {}
# Loop through each hour of the day
for hour in range(24):
# Select data for the current hour
hour_data = df[f'Stunde_{hour}']
# Scale the data
hour_data_scaled = scaler.fit_transform(hour_data.values.reshape(-1, 1))
# Define the autoencoder model
input_layer = Input(shape=(1,))
encoded = Dense(1, activation='relu')(input_layer)
decoded = Dense(1, activation='sigmoid')(encoded)
autoencoder = Model(input_layer, decoded)
# Compile the autoencoder
autoencoder.compile(optimizer='adam', loss='mse')
# Train the autoencoder
autoencoder.fit(hour_data_scaled, hour_data_scaled, epochs=2, batch_size=10, shuffle=True, verbose=0)
# Make predictions on the data
predictions = autoencoder.predict(hour_data_scaled)
# Calculate the reconstruction error for the data
reconstruction_error = np.mean(np.power(hour_data_scaled - predictions, 2), axis=1)
# Store the reconstruction error for the current hour
reconstruction_errors[f'Stunde_{hour}'] = reconstruction_error
# Convert the reconstruction errors dictionary into a DataFrame
reconstruction_errors_df = pd.DataFrame(reconstruction_errors, index=df.index)
# Calculate the mean reconstruction error for each hour across all days
mean_reconstruction_error = reconstruction_errors_df.mean(axis=0)
# Define the threshold for anomalies
threshold = mean_reconstruction_error.mean() + 2 * mean_reconstruction_error.std()
# Get the indices of the anomalies for each hour
anomaly_indices = {}
for hour in range(24):
anomaly_indices[hour] = np.where(reconstruction_errors_df[f'Stunde_{hour}'] > threshold)[0]
# Plot the anomaly dates
fig = go.Figure()
# Plot reconstruction error for each hour
for hour in range(24):
fig.add_trace(go.Scatter(x=df.index, y=reconstruction_errors_df[f'Stunde_{hour}'],
mode='lines',
name=f'Rekonstruktionsfehler Stunde {hour}'))
# Plot anomalies for each hour
for hour in range(24):
anomaly_dates = df.index[anomaly_indices[hour]]
fig.add_trace(go.Scatter(x=anomaly_dates, y=reconstruction_errors_df[f'Stunde_{hour}'].loc[anomaly_indices[hour]],
mode='markers',
marker=dict(color='red', size=8),
name=f'Anomalie Stunde {hour}'))
# Update layout
fig.update_layout(title='Anomaliedaten',
xaxis_title='Datum (Tag.Monat.Jahr)',
yaxis_title='Rekonstruktionsfehler')
fig.show()
# Print the anomaly dates for each hour
print("Anomaliedaten nach Stunden:")
for hour in range(24):
anomaly_dates = df.index[anomaly_indices[hour]]
print(f"Anomalien für Stunde {hour}:")
print(anomaly_dates)
result
my Dataset
mohammad Othman is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.