I am working on a machine learning project where I need to preprocess my data and prepare it for use with an LSTM model in PyTorch. I have a dataset with a datetime index and a target column called ‘price’. I split the data into training, validation, and test sets based on specific date ranges. I then scale the features using RobustScaler from sklearn and convert the data into PyTorch tensors.
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
import torch
# Define time periods for each set
train_start_date = '2020-01-01 00:00:00'
train_end_date = '2022-12-31 23:00:00'
val_start_date = '2023-01-01 00:00:00'
val_end_date = '2023-03-31 23:00:00'
test_start_date = '2023-04-01 00:00:00'
test_end_date = '2023-12-31 23:00:00'
# Split the data
data = ... # Assuming 'data' is already defined somewhere above in your code
train_data = data[(data.index >= train_start_date) & (data.index <= train_end_date)]
val_data = data[(data.index >= val_start_date) & (data.index <= val_end_date)]
test_data = data[(data.index >= test_start_date) & (data.index <= test_end_date)]
# Separate features and target
X_train = train_data.drop(columns=['price'])
y_train = train_data['price']
X_val = val_data.drop(columns=['price'])
y_val = val_data['price']
X_test = test_data.drop(columns=['price'])
y_test = test_data['price']
# Initialize the scaler
scaler = RobustScaler()
# Fit the scaler on the training data and transform
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)
# Convert to DataFrame if necessary (optional)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_val_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val_scaled.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
# Determine the dimensions for LSTM input
sequence_length = 1 # You can adjust this value according to your needs
num_features = X_train.shape[1] # 32 in your case
# Reshape X tensors
X_train_lstm = X_train_tensor.reshape(-1, sequence_length, num_features)
X_val_lstm = X_val_tensor.reshape(-1, sequence_length, num_features)
X_test_lstm = X_test_tensor.reshape(-1, sequence_length, num_features)
# Reshape y tensors to match the expected shape by LSTM, but typically y does not need to be reshaped
y_train_lstm = y_train_tensor.reshape(-1, 1)
y_val_lstm = y_val_tensor.reshape(-1, 1)
y_test_lstm = y_test_tensor.reshape(-1, 1)
# Print the shapes to confirm
print("Forme de l'ensemble d'entraînement :", X_train_tensor.shape, y_train_tensor.shape)
print("Forme de l'ensemble de validation :", X_val_tensor.shape, y_val_tensor.shape)
print("Forme de l'ensemble de test :", X_test_tensor.shape, y_test_tensor.shape)
print("Shape of X_train for LSTM:", X_train_lstm.shape)
print("Shape of y_train for LSTM:", y_train_lstm.shape)
print("Shape of X_val for LSTM:", X_val_lstm.shape)
print("Shape of y_val for LSTM:", y_val_lstm.shape)
print("Shape of X_test for LSTM:", X_test_lstm.shape)
print("Shape of y_test for LSTM:", y_test_lstm.shape)
When I run this code, I get the following error:
TypeError: 'Tensor' object is not callable
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler
import torch
# Define time periods for each set
train_start_date = '2020-01-01 00:00:00'
train_end_date = '2022-12-31 23:00:00'
val_start_date = '2023-01-01 00:00:00'
val_end_date = '2023-03-31 23:00:00'
test_start_date = '2023-04-01 00:00:00'
test_end_date = '2023-12-31 23:00:00'
# Split the data
data = ... # Assuming 'data' is already defined somewhere above in your code
train_data = data[(data.index >= train_start_date) & (data.index <= train_end_date)]
val_data = data[(data.index >= val_start_date) & (data.index <= val_end_date)]
test_data = data[(data.index >= test_start_date) & (data.index <= test_end_date)]
# Separate features and target
X_train = train_data.drop(columns=['price'])
y_train = train_data['price']
X_val = val_data.drop(columns=['price'])
y_val = val_data['price']
X_test = test_data.drop(columns=['price'])
y_test = test_data['price']
# Initialize the scaler
scaler = RobustScaler()
# Fit the scaler on the training data and transform
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)
# Convert to DataFrame if necessary (optional)
X_train_scaled = pd.DataFrame(X_train_scaled, columns=X_train.columns)
X_val_scaled = pd.DataFrame(X_val_scaled, columns=X_val.columns)
X_test_scaled = pd.DataFrame(X_test_scaled, columns=X_test.columns)
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled.values, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val_scaled.values, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val.values, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled.values, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
# Determine the dimensions for LSTM input
sequence_length = 1 # You can adjust this value according to your needs
num_features = X_train.shape[1] # 32 in your case
# Reshape X tensors
X_train_lstm = X_train_tensor.reshape(-1, sequence_length, num_features)
X_val_lstm = X_val_tensor.reshape(-1, sequence_length, num_features)
X_test_lstm = X_test_tensor.reshape(-1, sequence_length, num_features)
# Reshape y tensors to match the expected shape by LSTM, but typically y does not need to be reshaped
y_train_lstm = y_train_tensor.reshape(-1, 1)
y_val_lstm = y_val_tensor.reshape(-1, 1)
y_test_lstm = y_test_tensor.reshape(-1, 1)
# Print the shapes to confirm
print("Forme de l'ensemble d'entraînement :", X_train_tensor.shape, y_train_tensor.shape)
print("Forme de l'ensemble de validation :", X_val_tensor.shape, y_val_tensor.shape)
print("Forme de l'ensemble de test :", X_test_tensor.shape, y_test_tensor.shape)
print("Shape of X_train for LSTM:", X_train_lstm.shape)
print("Shape of y_train for LSTM:", y_train_lstm.shape)
print("Shape of X_val for LSTM:", X_val_lstm.shape)
print("Shape of y_val for LSTM:", y_val_lstm.shape)
print("Shape of X_test for LSTM:", X_test_lstm.shape)
print("Shape of y_test for LSTM:", y_test_lstm.shape)
When I run this code, I get the following error:
TypeError: 'Tensor' object is not callable