I am learning to use pytorch and i am using it on google colab. The Script that I am using is the following where I have a Toy dataset that I’ve created and an LSTM model that I want to train. I am getting several error that I am not able to solve
<code>import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary
import numpy as np
# Data preparation
X = np.array([[(1,2,3,3,1),(3,2,1,3,2),(3,2,2,3,3),(2,2,1,1,2),(2,1,1,1,1)],
[(4,5,6,4,4),(5,6,4,3,2),(5,5,6,1,3),(3,3,3,2,2),(2,3,3,2,1)],
[(7,8,9,4,7),(7,7,6,7,8),(5,8,7,8,8),(6,7,6,7,8),(5,7,6,6,6)],
[(7,8,9,8,6),(6,6,7,8,6),(8,7,8,8,8),(8,6,7,8,7),(8,6,7,8,8)],
[(4,5,6,5,5),(5,5,5,6,4),(6,5,5,5,6),(4,4,3,3,3),(5,5,4,4,5)],
[(4,5,6,5,5),(5,5,5,6,4),(6,5,5,5,6),(4,4,3,3,3),(5,5,4,4,5)],
[(1,2,3,3,1),(3,2,1,3,2),(3,2,2,3,3),(2,2,1,1,2),(2,1,1,1,1)]])
y = np.array([0, 1, 2, 2, 1, 1, 0])
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Move input tensors to the same device as the model
X = X.to(device)
y = y.to(device)
# Define the LSTM model
class LSTMModel(nn.Module):
def __init__(self):
super(LSTMModel, self).__init__()
self.lstm1 = nn.LSTM(input_size=5, hidden_size=128, batch_first=True)
self.lstm2 = nn.LSTM(input_size=128, hidden_size=128, batch_first=True)
self.fc1 = nn.Linear(128 * 5, 128)
self.fc2 = nn.Linear(128, 3)
def forward(self, x):
x, _ = self.lstm1(x)
x, _ = self.lstm2(x)
x = x.reshape(x.size(0), -1) # Flatten the output
x = F.relu(self.fc1(x))
x = F.log_softmax(self.fc2(x), dim=1)
return x
model = LSTMModel().to(device)
# Print model summary using torchsummary
summary(model, input_size=(5, 5))
# Print the model architecture manually
print(model)
# Count the total number of parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total parameters: {total_params}')
print(f'Trainable parameters: {trainable_params}')
</code>
<code>import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary
import numpy as np
# Data preparation
X = np.array([[(1,2,3,3,1),(3,2,1,3,2),(3,2,2,3,3),(2,2,1,1,2),(2,1,1,1,1)],
[(4,5,6,4,4),(5,6,4,3,2),(5,5,6,1,3),(3,3,3,2,2),(2,3,3,2,1)],
[(7,8,9,4,7),(7,7,6,7,8),(5,8,7,8,8),(6,7,6,7,8),(5,7,6,6,6)],
[(7,8,9,8,6),(6,6,7,8,6),(8,7,8,8,8),(8,6,7,8,7),(8,6,7,8,8)],
[(4,5,6,5,5),(5,5,5,6,4),(6,5,5,5,6),(4,4,3,3,3),(5,5,4,4,5)],
[(4,5,6,5,5),(5,5,5,6,4),(6,5,5,5,6),(4,4,3,3,3),(5,5,4,4,5)],
[(1,2,3,3,1),(3,2,1,3,2),(3,2,2,3,3),(2,2,1,1,2),(2,1,1,1,1)]])
y = np.array([0, 1, 2, 2, 1, 1, 0])
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Move input tensors to the same device as the model
X = X.to(device)
y = y.to(device)
# Define the LSTM model
class LSTMModel(nn.Module):
def __init__(self):
super(LSTMModel, self).__init__()
self.lstm1 = nn.LSTM(input_size=5, hidden_size=128, batch_first=True)
self.lstm2 = nn.LSTM(input_size=128, hidden_size=128, batch_first=True)
self.fc1 = nn.Linear(128 * 5, 128)
self.fc2 = nn.Linear(128, 3)
def forward(self, x):
x, _ = self.lstm1(x)
x, _ = self.lstm2(x)
x = x.reshape(x.size(0), -1) # Flatten the output
x = F.relu(self.fc1(x))
x = F.log_softmax(self.fc2(x), dim=1)
return x
model = LSTMModel().to(device)
# Print model summary using torchsummary
summary(model, input_size=(5, 5))
# Print the model architecture manually
print(model)
# Count the total number of parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total parameters: {total_params}')
print(f'Trainable parameters: {trainable_params}')
</code>
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchsummary import summary
import numpy as np
# Data preparation
X = np.array([[(1,2,3,3,1),(3,2,1,3,2),(3,2,2,3,3),(2,2,1,1,2),(2,1,1,1,1)],
[(4,5,6,4,4),(5,6,4,3,2),(5,5,6,1,3),(3,3,3,2,2),(2,3,3,2,1)],
[(7,8,9,4,7),(7,7,6,7,8),(5,8,7,8,8),(6,7,6,7,8),(5,7,6,6,6)],
[(7,8,9,8,6),(6,6,7,8,6),(8,7,8,8,8),(8,6,7,8,7),(8,6,7,8,8)],
[(4,5,6,5,5),(5,5,5,6,4),(6,5,5,5,6),(4,4,3,3,3),(5,5,4,4,5)],
[(4,5,6,5,5),(5,5,5,6,4),(6,5,5,5,6),(4,4,3,3,3),(5,5,4,4,5)],
[(1,2,3,3,1),(3,2,1,3,2),(3,2,2,3,3),(2,2,1,1,2),(2,1,1,1,1)]])
y = np.array([0, 1, 2, 2, 1, 1, 0])
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Move input tensors to the same device as the model
X = X.to(device)
y = y.to(device)
# Define the LSTM model
class LSTMModel(nn.Module):
def __init__(self):
super(LSTMModel, self).__init__()
self.lstm1 = nn.LSTM(input_size=5, hidden_size=128, batch_first=True)
self.lstm2 = nn.LSTM(input_size=128, hidden_size=128, batch_first=True)
self.fc1 = nn.Linear(128 * 5, 128)
self.fc2 = nn.Linear(128, 3)
def forward(self, x):
x, _ = self.lstm1(x)
x, _ = self.lstm2(x)
x = x.reshape(x.size(0), -1) # Flatten the output
x = F.relu(self.fc1(x))
x = F.log_softmax(self.fc2(x), dim=1)
return x
model = LSTMModel().to(device)
# Print model summary using torchsummary
summary(model, input_size=(5, 5))
# Print the model architecture manually
print(model)
# Count the total number of parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f'Total parameters: {total_params}')
print(f'Trainable parameters: {trainable_params}')
Does someone know what I am doing wrong?