Thiết kế website giá rẻ

Question

I’m trying to fine-tune LLaVA on a custom dataset, following the code presented here: https://colab.research.google.com/drive/10NLrfBKgt9ntPoQYQ24rEVWU-2rr1xf1#scrollTo=4ycDwt9G1RWN. I’ve been debugging and print steps but I’m not sure what I’m doing wrong to get this error:

<code> raise ValueError(

ValueError: The input provided to the model are wrong. The number of image tokens is 0 while the number of image given to the model is 8. This prevents correct indexing and breaks batch generation.

</code>

<code> raise ValueError( ValueError: The input provided to the model are wrong. The number of image tokens is 0 while the number of image given to the model is 8. This prevents correct indexing and breaks batch generation. </code>

    raise ValueError(
ValueError: The input provided to the model are wrong. The number of image tokens is 0 while the number of image given to the model is 8. This prevents correct indexing and breaks batch generation.

My code looks like this:

<code>import os

import torch

import pathlib

import wandb

from datasets import load_dataset

from trl import SFTTrainer

from peft import LoraConfig

from torchvision import transforms

from PIL import Image

from transformers import AutoTokenizer, AutoProcessor, TrainingArguments, LlavaForConditionalGeneration, BitsAndBytesConfig

model_id = "llava-hf/llava-1.5-7b-hf"

quantization_config = BitsAndBytesConfig(load_in_4bit=True)

model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16)

LLAVA_CHAT_TEMPLATE = """

A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.

{% for message in messages %}

{% if message['role'] == 'user' %}

USER:

{% else %}

ASSISTANT:

{% endif %}

{% for item in message['content'] %}

{% if item['type'] == 'text' %}

{% elif item['type'] == 'image' %}

<image>

{% endif %}

{% endfor %}

{% if message['role'] == 'user' %}

{% else %}

{% endif %}

{% endfor %}

"""

tokenizer = AutoTokenizer.from_pretrained(model_id)

tokenizer.chat_template = LLAVA_CHAT_TEMPLATE

processor = AutoProcessor.from_pretrained(model_id)

processor.tokenizer = tokenizer

from PIL import Image

class LLavaDataCollator:

def __init__(self, processor):

self.processor = processor

def __call__(self, examples):

texts = []

images = []

for example in examples:

conversations = example["conversations"]

formatted_conversation = {"messages": []}

for convo in conversations:

role = "user" if convo["from"] == "human" else "assistant"

content_items = []

for line in convo["value"].split("n"):

line = line.strip()

if line == "<image>":

content_items.append({"type": "image", "text": None})

else:

content_items.append({"type": "text", "text": line})

formatted_conversation["messages"].append({"role": role, "content": content_items})

text = self.processor.tokenizer.apply_chat_template(

formatted_conversation["messages"], tokenize=False, add_generation_prompt=False

)

texts.append(text)

# Load and append the image

image_path = example["image"]

image = Image.open(image_path).convert("RGB")

images.append(image)

batch = self.processor(texts, images, return_tensors="pt", padding=True)

labels = batch["input_ids"].clone()

if self.processor.tokenizer.pad_token_id is not None:

labels[labels == self.processor.tokenizer.pad_token_id] = -100

batch["labels"] = labels

return batch

train_dataset = load_dataset('json', data_files=f'{dataset_path}/train/dataset.json', split='train')

val_dataset = load_dataset('json', data_files=f'{dataset_path}/validation/dataset.json', split='train')

</code>

<code>import os import torch import pathlib import wandb from datasets import load_dataset from trl import SFTTrainer from peft import LoraConfig from torchvision import transforms from PIL import Image from transformers import AutoTokenizer, AutoProcessor, TrainingArguments, LlavaForConditionalGeneration, BitsAndBytesConfig model_id = "llava-hf/llava-1.5-7b-hf" quantization_config = BitsAndBytesConfig(load_in_4bit=True) model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16) LLAVA_CHAT_TEMPLATE = """ A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %} {% if message['role'] == 'user' %} USER: {% else %} ASSISTANT: {% endif %} {% for item in message['content'] %} {% if item['type'] == 'text' %} {{ item['text'] }} {% elif item['type'] == 'image' %} <image> {% endif %} {% endfor %} {% if message['role'] == 'user' %} {% else %} {{ eos_token }} {% endif %} {% endfor %} """ tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer.chat_template = LLAVA_CHAT_TEMPLATE processor = AutoProcessor.from_pretrained(model_id) processor.tokenizer = tokenizer from PIL import Image class LLavaDataCollator: def __init__(self, processor): self.processor = processor def __call__(self, examples): texts = [] images = [] for example in examples: conversations = example["conversations"] formatted_conversation = {"messages": []} for convo in conversations: role = "user" if convo["from"] == "human" else "assistant" content_items = [] for line in convo["value"].split("n"): line = line.strip() if line == "<image>": content_items.append({"type": "image", "text": None}) else: content_items.append({"type": "text", "text": line}) formatted_conversation["messages"].append({"role": role, "content": content_items}) text = self.processor.tokenizer.apply_chat_template( formatted_conversation["messages"], tokenize=False, add_generation_prompt=False ) texts.append(text) # Load and append the image image_path = example["image"] image = Image.open(image_path).convert("RGB") images.append(image) batch = self.processor(texts, images, return_tensors="pt", padding=True) labels = batch["input_ids"].clone() if self.processor.tokenizer.pad_token_id is not None: labels[labels == self.processor.tokenizer.pad_token_id] = -100 batch["labels"] = labels return batch train_dataset = load_dataset('json', data_files=f'{dataset_path}/train/dataset.json', split='train') val_dataset = load_dataset('json', data_files=f'{dataset_path}/validation/dataset.json', split='train') </code>

import os
import torch
import pathlib
import wandb

from datasets import load_dataset
from trl import SFTTrainer
from peft import LoraConfig
from torchvision import transforms
from PIL import Image
from transformers import AutoTokenizer, AutoProcessor, TrainingArguments, LlavaForConditionalGeneration, BitsAndBytesConfig

model_id = "llava-hf/llava-1.5-7b-hf"
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
model = LlavaForConditionalGeneration.from_pretrained(model_id, quantization_config=quantization_config, torch_dtype=torch.float16)

LLAVA_CHAT_TEMPLATE = """
A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. 
{% for message in messages %}
{% if message['role'] == 'user' %}
USER: 
{% else %}
ASSISTANT: 
{% endif %}
{% for item in message['content'] %}
{% if item['type'] == 'text' %}
{{ item['text'] }}
{% elif item['type'] == 'image' %}
<image>
{% endif %}
{% endfor %}
{% if message['role'] == 'user' %} 
{% else %}
{{ eos_token }}
{% endif %}
{% endfor %}
"""

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.chat_template = LLAVA_CHAT_TEMPLATE
processor = AutoProcessor.from_pretrained(model_id)
processor.tokenizer = tokenizer

from PIL import Image

class LLavaDataCollator:
    def __init__(self, processor):
        self.processor = processor

    def __call__(self, examples):
        texts = []
        images = []

        for example in examples:
            conversations = example["conversations"]
            formatted_conversation = {"messages": []}
            for convo in conversations:
                role = "user" if convo["from"] == "human" else "assistant"
                content_items = []
                for line in convo["value"].split("n"):
                    line = line.strip()
                    if line == "<image>":
                        content_items.append({"type": "image", "text": None})
                    else:
                        content_items.append({"type": "text", "text": line})
                formatted_conversation["messages"].append({"role": role, "content": content_items})

            text = self.processor.tokenizer.apply_chat_template(
                formatted_conversation["messages"], tokenize=False, add_generation_prompt=False
            )
            texts.append(text)

            # Load and append the image
            image_path = example["image"]
            image = Image.open(image_path).convert("RGB")
            images.append(image)

        batch = self.processor(texts, images, return_tensors="pt", padding=True)
        
        labels = batch["input_ids"].clone()
        if self.processor.tokenizer.pad_token_id is not None:
            labels[labels == self.processor.tokenizer.pad_token_id] = -100
        batch["labels"] = labels

        return batch

train_dataset = load_dataset('json', data_files=f'{dataset_path}/train/dataset.json', split='train')
val_dataset = load_dataset('json', data_files=f'{dataset_path}/validation/dataset.json', split='train')

My dataset looks like this:

<code>> train_dataset[0]

{'id': '3d0304ef-c94c-4212-a831-c6553e697d13',

'conversations': [{'from': 'human',

'value': "<image src='/mnt/user_storage/llava_dataset/train/images/3d0304ef-c94c-4212-a831-c6553e697d13.jpg'>nHow would you evaluate the overall quality of this design?"},

{'from': 'gpt', 'value': 'This design is good.'}],

'image': '/mnt/user_storage/llava_dataset/train/images/3d0304ef-c94c-4212-a831-c6553e697d13.jpg'}

</code>

<code>> train_dataset[0] {'id': '3d0304ef-c94c-4212-a831-c6553e697d13', 'conversations': [{'from': 'human', 'value': "<image src='/mnt/user_storage/llava_dataset/train/images/3d0304ef-c94c-4212-a831-c6553e697d13.jpg'>nHow would you evaluate the overall quality of this design?"}, {'from': 'gpt', 'value': 'This design is good.'}], 'image': '/mnt/user_storage/llava_dataset/train/images/3d0304ef-c94c-4212-a831-c6553e697d13.jpg'} </code>

> train_dataset[0]

{'id': '3d0304ef-c94c-4212-a831-c6553e697d13',
 'conversations': [{'from': 'human',
   'value': "<image src='/mnt/user_storage/llava_dataset/train/images/3d0304ef-c94c-4212-a831-c6553e697d13.jpg'>nHow would you evaluate the overall quality of this design?"},
  {'from': 'gpt', 'value': 'This design is good.'}],
 'image': '/mnt/user_storage/llava_dataset/train/images/3d0304ef-c94c-4212-a831-c6553e697d13.jpg'}

And when I test the data collator, this is what I get:

<code>> batch = data_collator([train_dataset[0]])

> print("Batch Input IDs:", batch["input_ids"].shape)

> print("Batch Attention Mask:", batch["attention_mask"].shape)

> print("Batch Pixel Values:", batch["pixel_values"].shape)

> print("Batch Labels:", batch["labels"].shape)

Batch Input IDs: torch.Size([1, 115])

Batch Attention Mask: torch.Size([1, 115])

Batch Pixel Values: torch.Size([1, 3, 336, 336])

Batch Labels: torch.Size([1, 115])

</code>

<code>> batch = data_collator([train_dataset[0]]) > print("Batch Input IDs:", batch["input_ids"].shape) > print("Batch Attention Mask:", batch["attention_mask"].shape) > print("Batch Pixel Values:", batch["pixel_values"].shape) > print("Batch Labels:", batch["labels"].shape) Batch Input IDs: torch.Size([1, 115]) Batch Attention Mask: torch.Size([1, 115]) Batch Pixel Values: torch.Size([1, 3, 336, 336]) Batch Labels: torch.Size([1, 115]) </code>

> batch = data_collator([train_dataset[0]])
> print("Batch Input IDs:", batch["input_ids"].shape)
> print("Batch Attention Mask:", batch["attention_mask"].shape)
> print("Batch Pixel Values:", batch["pixel_values"].shape)
> print("Batch Labels:", batch["labels"].shape)

Batch Input IDs: torch.Size([1, 115])
Batch Attention Mask: torch.Size([1, 115])
Batch Pixel Values: torch.Size([1, 3, 336, 336])
Batch Labels: torch.Size([1, 115])

To train, I do:

<code>training_args = TrainingArguments(

output_dir=f"{dataset_path}/training_output",

report_to="wandb",

learning_rate=1.4e-5,

per_device_train_batch_size=8,

gradient_accumulation_steps=1,

logging_steps=5,

num_train_epochs=100,

push_to_hub=False,

gradient_checkpointing=True,

remove_unused_columns=False,

fp16=True,

bf16=False

)

lora_config = LoraConfig(

r=64,

lora_alpha=16,

target_modules="all-linear"

)

trainer = SFTTrainer(

model=model,

args=training_args,

train_dataset=train_dataset,

eval_dataset=val_dataset,

peft_config=lora_config,

dataset_text_field="text", # need a dummy field

tokenizer=tokenizer,

data_collator=data_collator,

dataset_kwargs={"skip_prepare_dataset": True},

)

</code>

<code>training_args = TrainingArguments( output_dir=f"{dataset_path}/training_output", report_to="wandb", learning_rate=1.4e-5, per_device_train_batch_size=8, gradient_accumulation_steps=1, logging_steps=5, num_train_epochs=100, push_to_hub=False, gradient_checkpointing=True, remove_unused_columns=False, fp16=True, bf16=False ) lora_config = LoraConfig( r=64, lora_alpha=16, target_modules="all-linear" ) trainer = SFTTrainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset, peft_config=lora_config, dataset_text_field="text", # need a dummy field tokenizer=tokenizer, data_collator=data_collator, dataset_kwargs={"skip_prepare_dataset": True}, ) </code>

training_args = TrainingArguments(
    output_dir=f"{dataset_path}/training_output",
    report_to="wandb",
    learning_rate=1.4e-5,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=1,
    logging_steps=5,
    num_train_epochs=100,
    push_to_hub=False,
    gradient_checkpointing=True,
    remove_unused_columns=False,
    fp16=True,
    bf16=False
)

lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules="all-linear"
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=lora_config,
    dataset_text_field="text",  # need a dummy field
    tokenizer=tokenizer,
    data_collator=data_collator,
    dataset_kwargs={"skip_prepare_dataset": True},
)

Unfortunately, I can’t share my dataset here. When I call the trainer, I get:

<code>ValueError Traceback (most recent call last)

Cell In[22], line 1

----> 1 trainer.train()

File ~/anaconda3/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:361, in SFTTrainer.train(self, *args, **kwargs)

358 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune:

359 self.model = self._trl_activate_neftune(self.model)

--> 361 output = super().train(*args, **kwargs)

363 # After training we make sure to retrieve back the original forward pass method

364 # for the embedding layer by removing the forward post hook.

365 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune:

File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:1885, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)

1883 hf_hub_utils.enable_progress_bars()

1884 else:

-> 1885 return inner_training_loop(

1886 args=args,

1887 resume_from_checkpoint=resume_from_checkpoint,

1888 trial=trial,

1889 ignore_keys_for_eval=ignore_keys_for_eval,

1890 )

File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:2216, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)

2213 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)

2215 with self.accelerator.accumulate(model):

-> 2216 tr_loss_step = self.training_step(model, inputs)

2218 if (

2219 args.logging_nan_inf_filter

2220 and not is_torch_xla_available()

2221 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))

2222 ):

2223 # if loss is nan or inf simply add the average of previous logged losses

2224 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:3238, in Trainer.training_step(self, model, inputs)

3235 return loss_mb.reduce_mean().detach().to(self.args.device)

3237 with self.compute_loss_context_manager():

-> 3238 loss = self.compute_loss(model, inputs)

3240 del inputs

3241 torch.cuda.empty_cache()

File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:3264, in Trainer.compute_loss(self, model, inputs, return_outputs)

3262 else:

3263 labels = None

-> 3264 outputs = model(**inputs)

3265 # Save past state if it exists

3266 # TODO: this needs to be fixed and made cleaner later.

3267 if self.args.past_index >= 0:

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)

1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]

1531 else:

-> 1532 return self._call_impl(*args, **kwargs)

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs)

1536 # If we don't have any hooks, we want to skip the rest of the logic in

1537 # this function, and just call forward.

1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks

1539 or _global_backward_pre_hooks or _global_backward_hooks

1540 or _global_forward_hooks or _global_forward_pre_hooks):

-> 1541 return forward_call(*args, **kwargs)

1543 try:

1544 result = None

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:185, in DataParallel.forward(self, *inputs, **kwargs)

183 return self.module(*inputs[0], **module_kwargs[0])

184 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])

--> 185 outputs = self.parallel_apply(replicas, inputs, module_kwargs)

186 return self.gather(outputs, self.output_device)

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:200, in DataParallel.parallel_apply(self, replicas, inputs, kwargs)

199 def parallel_apply(self, replicas: Sequence[T], inputs: Sequence[Any], kwargs: Any) -> List[Any]:

--> 200 return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py:108, in parallel_apply(modules, inputs, kwargs_tup, devices)

106 output = results[i]

107 if isinstance(output, ExceptionWrapper):

--> 108 output.reraise()

109 outputs.append(output)

110 return outputs

File ~/anaconda3/lib/python3.10/site-packages/torch/_utils.py:705, in ExceptionWrapper.reraise(self)

701 except TypeError:

702 # If the exception takes multiple arguments, don't try to

703 # instantiate since we don't know how to

704 raise RuntimeError(msg) from None

--> 705 raise exception

ValueError: Caught ValueError in replica 0 on device 0.

Original Traceback (most recent call last):

File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py", line 83, in _worker

output = module(*input, **kwargs)

File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl

return forward_call(*args, **kwargs)

File "/home/ray/anaconda3/lib/python3.10/site-packages/peft/peft_model.py", line 642, in forward

return self.get_base_model()(*args, **kwargs)

File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl

return self._call_impl(*args, **kwargs)

File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl

return forward_call(*args, **kwargs)

File "/home/ray/anaconda3/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward

output = module._old_forward(*args, **kwargs)

File "/home/ray/anaconda3/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 439, in forward

inputs_embeds, attention_mask, labels, position_ids = self._merge_input_ids_with_image_features(

File "/home/ray/anaconda3/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 335, in _merge_input_ids_with_image_features

raise ValueError(

ValueError: The input provided to the model are wrong. The number of image tokens is 0 while the number of image given to the model is 8. This prevents correct indexing and breaks batch generation.

</code>

<code>ValueError Traceback (most recent call last) Cell In[22], line 1 ----> 1 trainer.train() File ~/anaconda3/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:361, in SFTTrainer.train(self, *args, **kwargs) 358 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune: 359 self.model = self._trl_activate_neftune(self.model) --> 361 output = super().train(*args, **kwargs) 363 # After training we make sure to retrieve back the original forward pass method 364 # for the embedding layer by removing the forward post hook. 365 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune: File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:1885, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs) 1883 hf_hub_utils.enable_progress_bars() 1884 else: -> 1885 return inner_training_loop( 1886 args=args, 1887 resume_from_checkpoint=resume_from_checkpoint, 1888 trial=trial, 1889 ignore_keys_for_eval=ignore_keys_for_eval, 1890 ) File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:2216, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval) 2213 self.control = self.callback_handler.on_step_begin(args, self.state, self.control) 2215 with self.accelerator.accumulate(model): -> 2216 tr_loss_step = self.training_step(model, inputs) 2218 if ( 2219 args.logging_nan_inf_filter 2220 and not is_torch_xla_available() 2221 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step)) 2222 ): 2223 # if loss is nan or inf simply add the average of previous logged losses 2224 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged) File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:3238, in Trainer.training_step(self, model, inputs) 3235 return loss_mb.reduce_mean().detach().to(self.args.device) 3237 with self.compute_loss_context_manager(): -> 3238 loss = self.compute_loss(model, inputs) 3240 del inputs 3241 torch.cuda.empty_cache() File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:3264, in Trainer.compute_loss(self, model, inputs, return_outputs) 3262 else: 3263 labels = None -> 3264 outputs = model(**inputs) 3265 # Save past state if it exists 3266 # TODO: this needs to be fixed and made cleaner later. 3267 if self.args.past_index >= 0: File ~/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs) 1530 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1531 else: -> 1532 return self._call_impl(*args, **kwargs) File ~/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs) 1536 # If we don't have any hooks, we want to skip the rest of the logic in 1537 # this function, and just call forward. 1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1539 or _global_backward_pre_hooks or _global_backward_hooks 1540 or _global_forward_hooks or _global_forward_pre_hooks): -> 1541 return forward_call(*args, **kwargs) 1543 try: 1544 result = None File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:185, in DataParallel.forward(self, *inputs, **kwargs) 183 return self.module(*inputs[0], **module_kwargs[0]) 184 replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) --> 185 outputs = self.parallel_apply(replicas, inputs, module_kwargs) 186 return self.gather(outputs, self.output_device) File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:200, in DataParallel.parallel_apply(self, replicas, inputs, kwargs) 199 def parallel_apply(self, replicas: Sequence[T], inputs: Sequence[Any], kwargs: Any) -> List[Any]: --> 200 return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)]) File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py:108, in parallel_apply(modules, inputs, kwargs_tup, devices) 106 output = results[i] 107 if isinstance(output, ExceptionWrapper): --> 108 output.reraise() 109 outputs.append(output) 110 return outputs File ~/anaconda3/lib/python3.10/site-packages/torch/_utils.py:705, in ExceptionWrapper.reraise(self) 701 except TypeError: 702 # If the exception takes multiple arguments, don't try to 703 # instantiate since we don't know how to 704 raise RuntimeError(msg) from None --> 705 raise exception ValueError: Caught ValueError in replica 0 on device 0. Original Traceback (most recent call last): File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py", line 83, in _worker output = module(*input, **kwargs) File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/ray/anaconda3/lib/python3.10/site-packages/peft/peft_model.py", line 642, in forward return self.get_base_model()(*args, **kwargs) File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl return forward_call(*args, **kwargs) File "/home/ray/anaconda3/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward output = module._old_forward(*args, **kwargs) File "/home/ray/anaconda3/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 439, in forward inputs_embeds, attention_mask, labels, position_ids = self._merge_input_ids_with_image_features( File "/home/ray/anaconda3/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 335, in _merge_input_ids_with_image_features raise ValueError( ValueError: The input provided to the model are wrong. The number of image tokens is 0 while the number of image given to the model is 8. This prevents correct indexing and breaks batch generation. </code>

ValueError                                Traceback (most recent call last)
Cell In[22], line 1
----> 1 trainer.train()

File ~/anaconda3/lib/python3.10/site-packages/trl/trainer/sft_trainer.py:361, in SFTTrainer.train(self, *args, **kwargs)
    358 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune:
    359     self.model = self._trl_activate_neftune(self.model)
--> 361 output = super().train(*args, **kwargs)
    363 # After training we make sure to retrieve back the original forward pass method
    364 # for the embedding layer by removing the forward post hook.
    365 if self.neftune_noise_alpha is not None and not self._trainer_supports_neftune:

File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:1885, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   1883         hf_hub_utils.enable_progress_bars()
   1884 else:
-> 1885     return inner_training_loop(
   1886         args=args,
   1887         resume_from_checkpoint=resume_from_checkpoint,
   1888         trial=trial,
   1889         ignore_keys_for_eval=ignore_keys_for_eval,
   1890     )

File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:2216, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   2213     self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
   2215 with self.accelerator.accumulate(model):
-> 2216     tr_loss_step = self.training_step(model, inputs)
   2218 if (
   2219     args.logging_nan_inf_filter
   2220     and not is_torch_xla_available()
   2221     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
   2222 ):
   2223     # if loss is nan or inf simply add the average of previous logged losses
   2224     tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:3238, in Trainer.training_step(self, model, inputs)
   3235     return loss_mb.reduce_mean().detach().to(self.args.device)
   3237 with self.compute_loss_context_manager():
-> 3238     loss = self.compute_loss(model, inputs)
   3240 del inputs
   3241 torch.cuda.empty_cache()

File ~/anaconda3/lib/python3.10/site-packages/transformers/trainer.py:3264, in Trainer.compute_loss(self, model, inputs, return_outputs)
   3262 else:
   3263     labels = None
-> 3264 outputs = model(**inputs)
   3265 # Save past state if it exists
   3266 # TODO: this needs to be fixed and made cleaner later.
   3267 if self.args.past_index >= 0:

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1532, in Module._wrapped_call_impl(self, *args, **kwargs)
   1530     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1531 else:
-> 1532     return self._call_impl(*args, **kwargs)

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py:1541, in Module._call_impl(self, *args, **kwargs)
   1536 # If we don't have any hooks, we want to skip the rest of the logic in
   1537 # this function, and just call forward.
   1538 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1539         or _global_backward_pre_hooks or _global_backward_hooks
   1540         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1541     return forward_call(*args, **kwargs)
   1543 try:
   1544     result = None

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:185, in DataParallel.forward(self, *inputs, **kwargs)
    183     return self.module(*inputs[0], **module_kwargs[0])
    184 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
--> 185 outputs = self.parallel_apply(replicas, inputs, module_kwargs)
    186 return self.gather(outputs, self.output_device)

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/data_parallel.py:200, in DataParallel.parallel_apply(self, replicas, inputs, kwargs)
    199 def parallel_apply(self, replicas: Sequence[T], inputs: Sequence[Any], kwargs: Any) -> List[Any]:
--> 200     return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])

File ~/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py:108, in parallel_apply(modules, inputs, kwargs_tup, devices)
    106     output = results[i]
    107     if isinstance(output, ExceptionWrapper):
--> 108         output.reraise()
    109     outputs.append(output)
    110 return outputs

File ~/anaconda3/lib/python3.10/site-packages/torch/_utils.py:705, in ExceptionWrapper.reraise(self)
    701 except TypeError:
    702     # If the exception takes multiple arguments, don't try to
    703     # instantiate since we don't know how to
    704     raise RuntimeError(msg) from None
--> 705 raise exception

ValueError: Caught ValueError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/parallel/parallel_apply.py", line 83, in _worker
    output = module(*input, **kwargs)
  File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ray/anaconda3/lib/python3.10/site-packages/peft/peft_model.py", line 642, in forward
    return self.get_base_model()(*args, **kwargs)
  File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/ray/anaconda3/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/ray/anaconda3/lib/python3.10/site-packages/accelerate/hooks.py", line 166, in new_forward
    output = module._old_forward(*args, **kwargs)
  File "/home/ray/anaconda3/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 439, in forward
    inputs_embeds, attention_mask, labels, position_ids = self._merge_input_ids_with_image_features(
  File "/home/ray/anaconda3/lib/python3.10/site-packages/transformers/models/llava/modeling_llava.py", line 335, in _merge_input_ids_with_image_features
    raise ValueError(
ValueError: The input provided to the model are wrong. The number of image tokens is 0 while the number of image given to the model is 8. This prevents correct indexing and breaks batch generation.

I’m not sure what is wrong with my data collator (which I believe is the source of the issue, as it’s the main difference with the original code). Any help would be appreciated.

Thiết kế website giá rẻ

Danh mục

LLaVA fine-tuning: The input provided to the model are wrong. The number of image tokens is 0 while the number of image given to the model is 8