im really not very experienced specially in NLP, so basically im trying to train a chatbot for my psycholical therapist web application, i trained the model twice on 2 datasets. now im trying to fine-tune the model on a better dataset that i found.
the new dataset has different column names (question, response_j, response_K)
the old ones had only 2 (text,label) labels were from 0-4 (0: Sadness. 1:Joy. 2:Love. 3:Anger. 4: Fear)
now when im trying to fine-tune the model im getting many errors. ill provide down below my codes and the latest error that i got.
# preprocess_dataset.py
import pandas as pd
# Load the new fine-tuning dataset
df = pd.read_parquet('datasets/dataset001.parquet')
# Ensure responses are strings and not null
df['response_j'] = df['response_j'].astype(str).fillna('')
df['response_k'] = df['response_k'].astype(str).fillna('')
# Save the cleaned dataset
df.to_parquet('datasets/dataset001_cleaned.parquet')
# tokenize_dataset.py
import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset
# Load the cleaned dataset
df = pd.read_parquet('datasets/dataset001_cleaned.parquet')
# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
# Tokenize the dataset
def tokenize_function(examples):
inputs = tokenizer(examples['question'], padding="max_length", truncation=True, max_length=128)
responses_j = tokenizer(examples['response_j'], padding="max_length", truncation=True, max_length=128)
responses_k = tokenizer(examples['response_k'], padding="max_length", truncation=True, max_length=128)
return {
'input_ids': inputs['input_ids'],
'attention_mask': inputs['attention_mask'],
'response_j_input_ids': responses_j['input_ids'],
'response_j_attention_mask': responses_j['attention_mask'],
'response_k_input_ids': responses_k['input_ids'],
'response_k_attention_mask': responses_k['attention_mask']
}
# Convert the dataframe to a Hugging Face Dataset
dataset = Dataset.from_pandas(df)
tokenized_dataset = dataset.map(tokenize_function, batched=True)
tokenized_dataset.save_to_disk('datasets/tokenized_dataset')
# define_model.py
from transformers import BertPreTrainedModel, BertModel
import torch.nn as nn
class CustomBertForQA(BertPreTrainedModel):
def __init__(self, config):
super(CustomBertForQA, self).__init__(config)
self.bert = BertModel(config)
self.classifier_j = nn.Linear(config.hidden_size, 2) # Assuming binary classification for responses
self.classifier_k = nn.Linear(config.hidden_size, 2) # Assuming binary classification for responses
def forward(self, input_ids, attention_mask, response_j_input_ids=None, response_j_attention_mask=None, response_k_input_ids=None, response_k_attention_mask=None):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
sequence_output = outputs[0]
logits_j = self.classifier_j(sequence_output)
logits_k = self.classifier_k(sequence_output)
return logits_j, logits_k
# train_model.py
import torch
from transformers import Trainer, TrainingArguments, BertConfig, AutoTokenizer
from datasets import load_from_disk
from define_model import CustomBertForQA # Import your custom model here
# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
# Load tokenized dataset
tokenized_dataset = load_from_disk('datasets/tokenized_dataset')
dataset_split = tokenized_dataset.train_test_split(test_size=0.2)
train_dataset = dataset_split['train']
test_dataset = dataset_split['test']
# Initialize the model
config = BertConfig.from_pretrained('bert-base-uncased')
model = CustomBertForQA.from_pretrained('bert-base-uncased', config=config)
# Define training arguments
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=3,
per_device_train_batch_size=16,
per_device_eval_batch_size=16,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
eval_strategy='epoch'
)
# Initialize Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=test_dataset
)
# Train the model
trainer.train()
# Save the model and tokenizer
model.save_pretrained('./fine_tuned_qa_model')
tokenizer.save_pretrained('./fine_tuned_qa_model')
this is the error that im getting:
C:UsersYamanDesktopProject2myenvLibsite-packageshuggingface_hubfile_download.py:1132: FutureWarning: resume_download
is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use force_download=True
.
warnings.warn(
Some weights of CustomBertForQA were not initialized from the model checkpoint at bert-base-uncased and are newly
initialized: [‘classifier_j.bias’, ‘classifier_j.weight’, ‘classifier_k.bias’, ‘classifier_k.weight’]
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
0%| | 0/1428 [00:00<?, ?it/s]Traceback (most recent call last):
File “C:UsersYamanDesktopProject2train_model.py”, line 41, in
trainer.train()
File “C:UsersYamanDesktopProject2myenvLibsite-packagestransformerstrainer.py”, line 1885, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File “C:UsersYamanDesktopProject2myenvLibsite-packagestransformerstrainer.py”, line 2216, in inner_training_loop
tr_loss_step = self.training_step(model, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:UsersYamanDesktopProject2myenvLibsite-packagestransformerstrainer.py”, line 3250, in training_step
self.accelerator.backward(loss)
File “C:UsersYamanDesktopProject2myenvLibsite-packagesaccelerateaccelerator.py”, line 2125, in backward loss.backward(**kwargs)
File “C:UsersYamanDesktopProject2myenvLibsite-packagestorch_tensor.py”, line 525, in backward
torch.autograd.backward(
File “C:UsersYamanDesktopProject2myenvLibsite-packagestorchautograd_init.py”, line 260, in backward grad_tensors_ = make_grads(tensors, grad_tensors, is_grads_batched=False)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File “C:UsersYamanDesktopProject2myenvLibsite-packagestorchautograd_init_.py”, line 133, in _make_grads
raise RuntimeError(
RuntimeError: grad can be implicitly created only for scalar outputs
0%| | 0/1428 [00:07<?, ?it/s]
user25260696 is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.