I am trying to finetune Llama-3-8B for a regression task using PEFT where it would output a float to “score” the input. I have tried to adjust the model for it by changing the number of output logits to 1 and changing the loss function to mean-squared-error. I have formatted my data by providing the instruction along with the input statement, tokenized it, and kept the output float value as it is in “labels”. I am getting an error:
RuntimeError: The size of tensor a (8) must match the size of tensor b (512) at non-singleton dimension 1
where my MAX_SEQ_LEN when tokenizing the INPUT instructions and statement to max length of 512 but it is somehow matching that with my output float value? All the resources online for finetuning Llama 3 is for text responses and not a regression task so I’m not sure how to do this. Below is my code:
from datasets import load_dataset, load_metric
from transformers import AutoTokenizer, Trainer, TrainingArguments, EarlyStoppingCallback, AutoModelForSequenceClassification, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer
import numpy as np
import json
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import torch
quant_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_quant_type="nf4"
)
MAX_SEQ_LEN = 512
device = "cuda" if torch.cuda.is_available() else "cpu"
dataset = load_dataset("json", data_files="datasets/aNLI/train_rationale_cleaned.jsonl", cache_dir="/data/etey916")["train"]
#dataset = load_dataset("json", data_files="datasets/aNLI/train_rationale_output.jsonl", cache_dir="/data/etey916")
model_name = "meta-llama/Meta-Llama-3-8B"
model = AutoModelForSequenceClassification.from_pretrained(model_name, quantization_config=quant_config, problem_type="regression", num_labels=1)
model.config.pad_token_id = model.config.eos_token_id
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
def format_dataset(sample):
input_text = sample['prompt'].replace('_', sample['rationale'])
instruction = ("<|begin_of_text|><|start_header_id|>user<|end_header_id|>nn"
"Give a statement consisting of the initial context, hypothesis, inference, and outcome, determine how plaussible it is by assigning a continuous score between -1 and 1 with -1 being extremely implaussible and 1 being extremely plaussible.n"
f"{input_text}<|eot_id|>")
output = float(sample['label'])
tokenized_inputs = tokenizer(instruction, truncation=True, padding="max_length", max_length=MAX_SEQ_LEN)
return {**tokenized_inputs, "labels": output}
def compute_metrics_for_regression(eval_pred):
logits, labels = eval_pred
labels = labels.reshape(-1, 1)
mse = mean_squared_error(labels_logits)
mae = mean_absolute_error(labels, logis)
r2 = r2_score(labels, logits)
sse = ((logits - labels).flatten()**2).tolist()
accuracy = sum([1 for e in sse if e < 0.25]) / len(sse)
return {"mse":mse, "mae":mae, "r2":r2, "accuracy":accuracy}
dataset = dataset.map(
format_dataset,
remove_columns=dataset.column_names
)
lora_config = LoraConfig(
r=8,
lora_alpha=16,
lora_dropout=0.1,
target_modules="all-linear",
bias = "none",
task_type="CAUSAL_LM",
)
peft_model = get_peft_model(model, lora_config)
training_args = TrainingArguments(
output_dir = "./results",
eval_strategy="epoch",
save_strategy="epoch",
learning_rate=5e-5,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
num_train_epochs=10,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=10,
save_steps=500,
save_total_limit=2,
#metric_for_best_model='eval_loss',
load_best_model_at_end=True,
greater_is_better=False
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
args=training_args,
peft_config=lora_config,
train_dataset=dataset,
compute_metrics=compute_metrics_for_regression,
)
trainer.train()
trainer.save_model("./finetuned_model")
tokenizer.save_pretrained("./finetuned_model")