Please help me with this one I am trying to improvise the text for my CSV file using Llama3 on Tesla core GPU. I have successfully loaded the model to GPU, but after running the nvidi-smi command, I do not see any spike in GPU usage. I am running the model on the local computer with the below code Please check and let me know what I can do to improve the speed of text generation. Everything looks correct to me in the code but I am still unable to generate the text I am unable to understand why so.
Cell1 in jupyter lab
import pandas as pd
import re
import os
import string
import contractions
import emoji
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from unidecode import unidecode
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
print("Libraries loaded")
cell2:-
import torch
print(torch.cuda.is_available()) # Should print True if CUDA is available
print(torch.cuda.get_device_name(0)) # Should print the name of the GPU
o/p :-
True
GRID T4-16Q
cell3:-
print("PyTorch version:", torch.__version__)
o/p :-
PyTorch version: 2.5.1+cu121
cell4:-
# Load the dataset
file_path = r'path to my file'
df = pd.read_csv(file_path, low_memory=False)
cell5:-
# Initial NaN count in the 'x' column
nan_count_before = df['x'].isna().sum()
print(f"NaN count before cleaning: {nan_count_before}")
o/p :-
NaN count before cleaning: 17965
cell6:-
# Drop rows with NaN in the 'x' column
df = df.dropna(subset=['x'])
# Convert 'ro_remark' to string
df['x'] = df['x'].astype(str)
print(f"Number of records after dropping NaN: {len(df)}")
o/p :-
Number of records after dropping NaN: 35174
cell7:-
def contains_only_punctuation_or_nonalpha(text):
"""
Checks if the text contains only punctuation or non-alphabetic characters.
"""
text = text.strip()
if not text:
return True
return not bool(re.search(r'[a-zA-Z]', text))
# Identify and remove rows containing only punctuation or non-alphabetic characters
rows_to_remove = df['x'].apply(contains_only_punctuation_or_nonalpha)
removed_count = rows_to_remove.sum()
# Remove these rows from the DataFrame
df = df[~rows_to_remove]
print(f"Number of records removed: {removed_count}")
print(f"Number of records after cleaning: {len(df)}")
o/p :-
Number of records removed: 2641
Number of records after cleaning: 32533
cell8:-
# Define the model and tokenizer directory
model_directory = "path to my modelllama_model"
model_name = "meta-llama/Llama-3.1-8B"
print(os.getenv("TORCH_USE_CUDA_DSA"))
o/p :-
none
cell9:-
try:
# Download and save the model
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
# Save tokenizer and model to the specified directory
tokenizer.save_pretrained(model_directory)
model.save_pretrained(model_directory)
print("Model and tokenizer downloaded successfully to:", model_directory)
except Exception as e:
print(e)
o/p :-
model was saved successfully here
cell10:-
# Environment settings for CUDA
os.environ["TORCH_USE_CUDA_DSA"] = "1"
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
print(os.getenv("TORCH_USE_CUDA_DSA"))
o/p :-
1
cell11:-
# Load tokenizer and model from the custom directory
try:
tokenizer = AutoTokenizer.from_pretrained(model_directory, local_files_only=True)
model = AutoModelForCausalLM.from_pretrained(model_directory, local_files_only=True)
# Ensure pad token is set if needed
if tokenizer.pad_token_id is None:
tokenizer.pad_token_id = tokenizer.eos_token_id
print("Tokenizer and model loaded successfully from local directory.")
except Exception as e:
print(f"Error loading model/tokenizer: {e}")
raise
o/p :-
Loading checkpoint shards: 100%
7/7 [00:02<00:00, 2.61it/s]
Tokenizer and model loaded successfully from local directory.
cell12:-
# Check CUDA version and load the model onto the device
print(f"CUDA version: {torch.version.cuda}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Model loaded to {device}")
o/p :-
CUDA version: 12.1
Model loaded to cuda
cell13:-
model.eval()
cell14:-
def initialize_pipeline(model, tokenizer, device):
"""
Initialize the text-generation pipeline using the specified model and tokenizer.
"""
try:
text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
print("Pipeline initialized successfully.")
return text_gen_pipeline
except Exception as e:
print(f"Error initializing pipeline: {e}")
raise
Chirayu Mehta is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.