When I run this code, the runtime session automatically closes. There is no space in RAM left. Hence the session closes automatically. I am using pytorch in Google Colab notebook. I tried switching from CPU to GPU but still the session closes automatically.
!pip install datasets
!pip install transformers
import torch
import wandb
import torch.nn as nn
from datasets import load_dataset
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
!huggingface-cli login
model_name = "distilgpt2"
print(f"Using GPU: {torch.cuda.is_available()}")
tokenizer = AutoTokenizer.from_pretrained(model_name)
len(tokenizer)
tokenizer.add_special_tokens({'pad_token': '[PAD]'})
len(tokenizer)
class Custom_GPT2_Model(nn.Module):
def __init__(self, tokenizer):
super().__init__()
self.gpt2 = AutoModelForCausalLM.from_pretrained(model_name)
self.gpt2.resize_token_embeddings(len(tokenizer))
for param in self.gpt2.parameters():
param.requires_grad = False
self.gpt2.gradient_checkpointing_enable()
self.custom_layer = nn.Linear(self.gpt2.config.vocab_size, self.gpt2.config.vocab_size)
def forward(self, input_ids, attention_mask=None, labels=None):
outputs = self.gpt2(input_ids=input_ids, attention_mask=attention_mask)
logits = self.custom_layer(outputs.logits)
loss = None
if labels is not None:
loss_func = nn.CrossEntropyLoss()
# loss = loss_func(logits, labels)
loss = loss_func(logits.view(-1, logits.size(-1)), labels.view(-1))
return {
'loss': loss,
'logits': logits
}
model = Custom_GPT2_Model(tokenizer)
# Data Pre-Processing
dataset = load_dataset("wikitext", "wikitext-2-raw-v1")
dataset
def tokenize_func(examples):
return tokenizer(examples['text'], padding='max_length', truncation=True, max_length=128, return_tensors='pt')
tokenized_dataset = dataset.map(tokenize_func, batched=True, remove_columns=['text'])
tokenized_dataset
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=0.5,
per_device_train_batch_size=1,
gradient_accumulation_steps=16,
fp16=True,
warmup_steps=2500,
learning_rate=0.1,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=5000,
)
trainer = Trainer(
model = model,
args = training_args,
train_dataset = tokenized_dataset['train'].select(range(100)),
eval_dataset = tokenized_dataset['test'].select(range(100)),
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
)
trainer.train()
How should I manage memory while fine-tuning deep learning models?
1