i keep getting this error of RuntimeError: CUDA error: CUBLAS_STATUS_EXECUTION_FAILED when calling cublasLtMatmul with transpose_mat1 1 transpose_mat2 0 m 512 n 16 k 864 mat1_ld 864 mat2_ld 864 result_ld 512 abcType 0 computeType 68 scaleType 0
whenever i try to train my pytorch model. i will give details of my dataloader and model below.
dataloader:
class SampleDataset(Dataset):
def __init__(
self,
df_train: pd.DataFrame,
df_label: pd.DataFrame,
numeric_features_list: List,
categorical_features_list: List,
):
self.texts = df_train["description_qanda_processed"].values
self.numerical_features = df_train[numeric_features_list].values
self.categorical_features = df_train[categorical_features_list].values
self.labels = df_label["num_claim_attempts_scale"].values[:, None]
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
text = self.texts[idx]
numerical_feature = torch.tensor(
self.numerical_features[idx], dtype=torch.float32
)
categorical_feature = torch.tensor(
self.categorical_features[idx], dtype=torch.long
)
label = torch.tensor(self.labels[idx], dtype=torch.float32)
return (
text,
numerical_feature.to(device),
categorical_feature.to(device),
label.to(device),
)
model architecture:
from typing import List
class RegressionBertModel(nn.Module):
def __init__(
self,
distilbert_model_name: str,
num_numerical_features: int,
num_categorical_features: int,
embedding_dims: List,
num_connected_layers: int,
output_size: int,
):
super().__init__()
# load pre-trained BERT model and tokenizer
self.bert = DistilBertModel.from_pretrained(distilbert_model_name)
self.tokenizer = DistilBertTokenizer.from_pretrained(distilbert_model_name)
# define the size of the hidden state from BERT
bert_hidden_size = self.bert.config.hidden_size
# embedding layers for categorical features
self.embeddings = nn.ModuleList(
[
nn.Embedding(num_categories, embedding_dim)
for num_categories, embedding_dim in zip(
num_categorical_features, embedding_dims
)
]
)
# create layers and define hidden sizes
self.fc_combined = nn.Sequential(
nn.Linear(bert_hidden_size + num_numerical_features + sum(embedding_dims), 512),
nn.ReLU(),
nn.Linear(512, 256),
nn.ReLU(),
nn.Linear(256, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 32),
nn.ReLU(),
nn.Linear(32, 16),
nn.ReLU(),
nn.Linear(16, output_size) # Adjust the output size for regression
)
def forward(self, input_text, numerical_features, categorical_features, device):
inputs = self.tokenizer(
input_text,
return_tensors="pt",
padding=True,
truncation=True,
max_length=512,
)
inputs = {key: value.to(device) for key, value in inputs.items()}
outputs = self.bert(**inputs)
# get the embeddings for the [CLS] token
bert_embeddings = outputs.last_hidden_state[:, 0, :]
# process categorical features
categorical_embeddings = [
embedding(categorical_features[:, i].to(device)).type(torch.float32)
for i, embedding in enumerate(self.embeddings)
]
numerical_features = numerical_features.to(device, dtype=torch.float32)
categorical_out = torch.cat(categorical_embeddings, dim=1).to(torch.float32)
# concatenate BERT embeddings, numerical features, and categorical features
combined_features = torch.cat((bert_embeddings, numerical_features.to(device), categorical_out), dim=1)
# pass through the combined fully connected layers
output = self.fc_combined(combined_features)
return output
model training snippet:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
_model = RegressionBertModel(
distilbert_model_name='distilbert-base-uncased',
num_numerical_features=len(_columns),
num_categorical_features=[12, 61, 141, 8],
embedding_dims=[6,30,50,4],
num_connected_layers=model_config["num_connected_layers"],
output_size=1,
)
_model.to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(_model.parameters(), lr=0.0001, weight_decay=0.00001)
num_epochs = 1
for epoch in range(num_epochs):
_model.train()
epoch_loss = 0
with tqdm(total=len(dataloader), desc=f"Epoch {epoch+1}/{num_epochs}") as pbar:
for texts, numerical_features, categorical_features, labels in dataloader:
optimizer.zero_grad()
outputs = _model(texts, numerical_features, categorical_features, device)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
epoch_loss += loss.item()
pbar.update(1)
avg_epoch_loss = epoch_loss / len(dataloader)
print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_epoch_loss}")
can someone pls help?
i have checked the feature normalisation, and they seem to be within reasonable bounds of +7 and -3 for features.
also cross-checked the shape of the combined_features
for each batch, and nothing seems to be out of ordinary.
Shu Ming Peh is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.