Thiết kế website giá rẻ

Question

Please help me with this one I am trying to improvise the text for my CSV file using Llama3 on Tesla core GPU. I have successfully loaded the model to GPU, but after running the nvidi-smi command, I do not see any spike in GPU usage. I am running the model on the local computer with the below code Please check and let me know what I can do to improve the speed of text generation. Everything looks correct to me in the code but I am still unable to generate the text I am unable to understand why so.

Cell1 in jupyter lab

import pandas as pd
import re
import os
import string
import contractions
import emoji
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from unidecode import unidecode
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
print("Libraries loaded")

cell2:-

import torch
print(torch.cuda.is_available())  # Should print True if CUDA is available
print(torch.cuda.get_device_name(0))  # Should print the name of the GPU

o/p :-

True
GRID T4-16Q

cell3:-

print("PyTorch version:", torch.__version__)

o/p :-

PyTorch version: 2.5.1+cu121

cell4:-

# Load the dataset
file_path = r'path to my file' 
df = pd.read_csv(file_path, low_memory=False)

cell5:-

# Initial NaN count in the 'x' column
nan_count_before = df['x'].isna().sum()
print(f"NaN count before cleaning: {nan_count_before}")

o/p :-

NaN count before cleaning: 17965

cell6:-

# Drop rows with NaN in the 'x' column
df = df.dropna(subset=['x'])
# Convert 'ro_remark' to string
df['x'] = df['x'].astype(str)
print(f"Number of records after dropping NaN: {len(df)}")

o/p :-

Number of records after dropping NaN: 35174

cell7:-

def contains_only_punctuation_or_nonalpha(text):
    """
    Checks if the text contains only punctuation or non-alphabetic characters.
    """
    text = text.strip()
    if not text:
        return True
    return not bool(re.search(r'[a-zA-Z]', text))

# Identify and remove rows containing only punctuation or non-alphabetic characters
rows_to_remove = df['x'].apply(contains_only_punctuation_or_nonalpha)
removed_count = rows_to_remove.sum()

# Remove these rows from the DataFrame
df = df[~rows_to_remove]

print(f"Number of records removed: {removed_count}")
print(f"Number of records after cleaning: {len(df)}")

o/p :-

Number of records removed: 2641
Number of records after cleaning: 32533

cell8:-

# Define the model and tokenizer directory
model_directory = "path to my modelllama_model"
model_name = "meta-llama/Llama-3.1-8B"
print(os.getenv("TORCH_USE_CUDA_DSA"))

o/p :-

none

cell9:-

try:
    # Download and save the model
    tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
    
    # Save tokenizer and model to the specified directory
    tokenizer.save_pretrained(model_directory)
    model.save_pretrained(model_directory)
    
    print("Model and tokenizer downloaded successfully to:", model_directory)
except Exception as e:
    print(e)

o/p :-

model was saved successfully here

cell10:-

# Environment settings for CUDA
os.environ["TORCH_USE_CUDA_DSA"] = "1"
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
print(os.getenv("TORCH_USE_CUDA_DSA"))

o/p :-

1

cell11:-

# Load tokenizer and model from the custom directory
try:
    tokenizer = AutoTokenizer.from_pretrained(model_directory, local_files_only=True)
    model = AutoModelForCausalLM.from_pretrained(model_directory, local_files_only=True)

    # Ensure pad token is set if needed
    if tokenizer.pad_token_id is None:
        tokenizer.pad_token_id = tokenizer.eos_token_id

    print("Tokenizer and model loaded successfully from local directory.")
except Exception as e:
    print(f"Error loading model/tokenizer: {e}")
    raise

o/p :-

Loading checkpoint shards: 100%
7/7 [00:02<00:00, 2.61it/s]
Tokenizer and model loaded successfully from local directory.

cell12:-

# Check CUDA version and load the model onto the device
print(f"CUDA version: {torch.version.cuda}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Model loaded to {device}")

o/p :-

CUDA version: 12.1
Model loaded to cuda

cell13:-

model.eval()

cell14:-

def initialize_pipeline(model, tokenizer, device):
    """
    Initialize the text-generation pipeline using the specified model and tokenizer.
    """
    try:
        text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
        print("Pipeline initialized successfully.")
        return text_gen_pipeline
    except Exception as e:
        print(f"Error initializing pipeline: {e}")
        raise

Thiết kế website giá rẻ

Danh mục

Llama3 8B generating text very slow on Tesla Core GPU

Cell1 in jupyter lab

cell2:-

o/p :-

cell3:-

o/p :-

cell4:-

cell5:-

o/p :-

cell6:-

o/p :-

cell7:-

o/p :-

cell8:-

o/p :-

cell9:-

o/p :-

cell10:-

o/p :-

cell11:-

o/p :-

cell12:-

o/p :-

cell13:-

cell14:-