Preface
I am trying to fine-tune LLaMA by adding a classification layer on the top using AutoModelForSequenceClassification
. I fine-tuned it for the binary classification task.
Implementation
- Initial dataset
<code>Dataset({
features: ['text', 'label'],
num_rows: 11454
}) # text is a string and label is integer
</code>
<code>Dataset({
features: ['text', 'label'],
num_rows: 11454
}) # text is a string and label is integer
</code>
Dataset({
features: ['text', 'label'],
num_rows: 11454
}) # text is a string and label is integer
- Model loading
<code>from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model_id = "meta-llama/Meta-Llama-3-8B"
id2label = {0: "GOOD", 1: "BAD"}
label2id = {"GOOD": 0, "BAD": 1}
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
)
model = AutoModelForSequenceClassification.from_pretrained(
model_id,
quantization_config=quantization_config,
num_labels=2,
id2label=id2label,
label2id=label2id,
device_map="auto",
trust_remote_code=True
)
lora_config = LoraConfig(
r=32,
target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
bias="none",
)
model.add_adapter(lora_config)
</code>
<code>from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model_id = "meta-llama/Meta-Llama-3-8B"
id2label = {0: "GOOD", 1: "BAD"}
label2id = {"GOOD": 0, "BAD": 1}
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
)
model = AutoModelForSequenceClassification.from_pretrained(
model_id,
quantization_config=quantization_config,
num_labels=2,
id2label=id2label,
label2id=label2id,
device_map="auto",
trust_remote_code=True
)
lora_config = LoraConfig(
r=32,
target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
bias="none",
)
model.add_adapter(lora_config)
</code>
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model_id = "meta-llama/Meta-Llama-3-8B"
id2label = {0: "GOOD", 1: "BAD"}
label2id = {"GOOD": 0, "BAD": 1}
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
)
model = AutoModelForSequenceClassification.from_pretrained(
model_id,
quantization_config=quantization_config,
num_labels=2,
id2label=id2label,
label2id=label2id,
device_map="auto",
trust_remote_code=True
)
lora_config = LoraConfig(
r=32,
target_modules=["q_proj", "o_proj", "k_proj", "v_proj", "gate_proj", "up_proj", "down_proj"],
bias="none",
)
model.add_adapter(lora_config)
- Tokenizer
<code>tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '<PAD>'})
</code>
<code>tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '<PAD>'})
</code>
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
tokenizer.add_special_tokens({'pad_token': '<PAD>'})
- Split Dataset for Identify Overfitting during the training phase
<code># Tokenize your dataset
def tokenize_function(examples):
return tokenizer(
examples['text'],
padding='max_length',
truncation=True
)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
# Set format for PyTorch
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
# Split the dataset into train and validation
split_dataset = tokenized_datasets.train_test_split(test_size=0.2)
train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
def compute_metrics(p):
preds = np.argmax(p.predictions, axis=1)
precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='binary')
acc = accuracy_score(p.label_ids, preds)
return {
'accuracy': acc,
'f1': f1,
'precision': precision,
'recall': recall
}
</code>
<code># Tokenize your dataset
def tokenize_function(examples):
return tokenizer(
examples['text'],
padding='max_length',
truncation=True
)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
# Set format for PyTorch
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
# Split the dataset into train and validation
split_dataset = tokenized_datasets.train_test_split(test_size=0.2)
train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
def compute_metrics(p):
preds = np.argmax(p.predictions, axis=1)
precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='binary')
acc = accuracy_score(p.label_ids, preds)
return {
'accuracy': acc,
'f1': f1,
'precision': precision,
'recall': recall
}
</code>
# Tokenize your dataset
def tokenize_function(examples):
return tokenizer(
examples['text'],
padding='max_length',
truncation=True
)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
# Set format for PyTorch
tokenized_datasets.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
# Split the dataset into train and validation
split_dataset = tokenized_datasets.train_test_split(test_size=0.2)
train_dataset = split_dataset['train']
eval_dataset = split_dataset['test']
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
def compute_metrics(p):
preds = np.argmax(p.predictions, axis=1)
precision, recall, f1, _ = precision_recall_fscore_support(p.label_ids, preds, average='binary')
acc = accuracy_score(p.label_ids, preds)
return {
'accuracy': acc,
'f1': f1,
'precision': precision,
'recall': recall
}
- Training arguments and training executions
<code>name = "llama-3-8b-finetuned"
training_args = TrainingArguments(
output_dir = f"./output",
logging_dir = f"./log",
max_steps = 1000,
save_steps = 100,
eval_steps = 100,
logging_steps = 10,
save_strategy = "steps",
evaluation_strategy = "steps",
logging_strategy = "steps",
learning_rate = 6e-5,
per_device_train_batch_size = 4,
per_device_eval_batch_size = 4,
gradient_accumulation_steps = 4,
gradient_checkpointing=True,
max_grad_norm = 0.3,
warmup_ratio = 0.03,
lr_scheduler_type = "cosine",
optim = "paged_adamw_32bit",
push_to_hub = True,
report_to = 'wandb',
run_name = name,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
</code>
<code>name = "llama-3-8b-finetuned"
training_args = TrainingArguments(
output_dir = f"./output",
logging_dir = f"./log",
max_steps = 1000,
save_steps = 100,
eval_steps = 100,
logging_steps = 10,
save_strategy = "steps",
evaluation_strategy = "steps",
logging_strategy = "steps",
learning_rate = 6e-5,
per_device_train_batch_size = 4,
per_device_eval_batch_size = 4,
gradient_accumulation_steps = 4,
gradient_checkpointing=True,
max_grad_norm = 0.3,
warmup_ratio = 0.03,
lr_scheduler_type = "cosine",
optim = "paged_adamw_32bit",
push_to_hub = True,
report_to = 'wandb',
run_name = name,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
</code>
name = "llama-3-8b-finetuned"
training_args = TrainingArguments(
output_dir = f"./output",
logging_dir = f"./log",
max_steps = 1000,
save_steps = 100,
eval_steps = 100,
logging_steps = 10,
save_strategy = "steps",
evaluation_strategy = "steps",
logging_strategy = "steps",
learning_rate = 6e-5,
per_device_train_batch_size = 4,
per_device_eval_batch_size = 4,
gradient_accumulation_steps = 4,
gradient_checkpointing=True,
max_grad_norm = 0.3,
warmup_ratio = 0.03,
lr_scheduler_type = "cosine",
optim = "paged_adamw_32bit",
push_to_hub = True,
report_to = 'wandb',
run_name = name,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
Issues and Questions
But on the execution of trainer.train()
it shows this error:
<code>:1141: indexSelectLargeIndex: block: [202,0,0], thread: [31,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
</code>
<code>:1141: indexSelectLargeIndex: block: [202,0,0], thread: [31,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
</code>
:1141: indexSelectLargeIndex: block: [202,0,0], thread: [31,0,0] Assertion `srcIndex < srcSelectDimSize` failed.
What are the issues? and how to fix the issue.
Notes:
Package version that I used:
<code># %pip install -q -U accelerate==0.24.1
# %pip install -q -U peft==0.6.2
# %pip install -q -U bitsandbytes==0.40.0
# %pip install -q -U transformers==4.35.2
# %pip install -q -U trl==0.7.4
# %pip install -q -U datasets==2.17.1
# %pip install -q -U wandb
# %pip install -q -U scipy
# %pip install -q -U torch==1.13
</code>
<code># %pip install -q -U accelerate==0.24.1
# %pip install -q -U peft==0.6.2
# %pip install -q -U bitsandbytes==0.40.0
# %pip install -q -U transformers==4.35.2
# %pip install -q -U trl==0.7.4
# %pip install -q -U datasets==2.17.1
# %pip install -q -U wandb
# %pip install -q -U scipy
# %pip install -q -U torch==1.13
</code>
# %pip install -q -U accelerate==0.24.1
# %pip install -q -U peft==0.6.2
# %pip install -q -U bitsandbytes==0.40.0
# %pip install -q -U transformers==4.35.2
# %pip install -q -U trl==0.7.4
# %pip install -q -U datasets==2.17.1
# %pip install -q -U wandb
# %pip install -q -U scipy
# %pip install -q -U torch==1.13