Thiết kế website giá rẻ

Question

I’m attempting to prepare a model for question-answer tasks for my thesis, in the following code:
`

import os
import json
import torch
from torch.nn.utils.rnn import pad_sequence
from transformers import DataCollatorForLanguageModeling, AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, TrainerCallback
import matplotlib.pyplot as plt

class PlotCallback(TrainerCallback):
    def __init__(self):
        self.train_losses = []
        self.eval_losses = []
        self.eval_steps = []

    def on_log(self, args, state, control, logs=None, **kwargs):
        if state.is_world_process_zero:
            if 'loss' in logs:
                self.train_losses.append(logs['loss'])
            if 'eval_loss' in logs:
                self.eval_losses.append(logs['eval_loss'])
                self.eval_steps.append(state.global_step)


    def on_train_end(self, args, state, control, **kwargs):
        # Tanulási veszteség diagram
        plt.figure(figsize=(10, 6))
        plt.plot(self.train_losses, label='Training Loss', color='blue')
        plt.title('Training Loss over Training Steps')
        plt.xlabel('Training Steps')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True)
        plt.show()
class MyDataCollator(DataCollatorForLanguageModeling):
    def __init__(self, tokenizer):
        super().__init__(tokenizer, mlm=False)  # mlm=False a kausalitás alapú nyelvi modellezéshez

    def collate_batch(self, examples):
        contexts = [example['Context'] for example in examples]
        qa_lists = [example['Questions_Answers'] for example in examples]

        inputs = []
        labels = []

        for context, qa_list in zip(contexts, qa_lists):
            context_inputs = self.tokenizer(context, return_tensors="tf", padding=True, truncation=True)

            # Konvertálja a context_inputs tenzorokat PyTorch tenzorokká
            context_input_ids = torch.tensor(context_inputs["input_ids"].numpy(), dtype=torch.long)
            context_attention_mask = torch.tensor(context_inputs["attention_mask"].numpy(), dtype=torch.long)

            for qa in qa_list:
                question_inputs = self.tokenizer(qa['Question'], return_tensors="tf", padding=True, truncation=True)

                # Konvertálja a question_inputs tenzorokat PyTorch tenzorokká
                question_input_ids = torch.tensor(question_inputs["input_ids"].numpy(), dtype=torch.long)
                question_attention_mask = torch.tensor(question_inputs["attention_mask"].numpy(), dtype=torch.long)

                # Az input és label adatok hozzáadása
                inputs.append((context_input_ids, context_attention_mask))
                labels.append((question_input_ids[0], question_attention_mask[0]))

        # Továbbra is a torch.nn.utils.rnn.pad_sequence-t használjuk a padding-hez
        return {
            "input_ids": torch.nn.utils.rnn.pad_sequence([i[0] for i in inputs], batch_first=True),
            "attention_mask": torch.nn.utils.rnn.pad_sequence([i[1] for i in inputs], batch_first=True),
            "labels": torch.nn.utils.rnn.pad_sequence([l[0] for l in labels], batch_first=True),
            "labels_attention_mask": torch.nn.utils.rnn.pad_sequence([l[1] for l in labels], batch_first=True)
        }

with open("train.MILQA-2023-03-27.squad.s.json", "r", encoding="utf-8") as f:
    data = json.load(f)

inputs = []

for paragraph in data["data"][0]["paragraphs"]:
    context = paragraph["context"]
    qa_list = []
    for qa in paragraph["qas"]:
        question = qa["question"]
        short_answer = None
        long_answer = None
        short_start = None
        short_end = None
        long_start = None
        long_end = None
        if "answers" in qa:
            if "short" in qa["answers"]:
                short_answer = qa["answers"]["short"][0]["text"]
                short_start = qa["answers"]["short"][0]["start"]
                short_end = qa["answers"]["short"][0]["end"]
            if "long" in qa["answers"]:
                long_answer = qa["answers"]["long"][0]["text"]
                long_start = qa["answers"]["long"][0]["start"]
                long_end = qa["answers"]["long"][0]["end"]
        qa_list.append({'Question': question, 'Short Answer': short_answer, 'Long Answer': long_answer})
    inputs.append({'Context': context, 'Questions_Answers': qa_list})

for item in data["data"][1:]:
    for paragraph in item["paragraphs"]:
        context = paragraph["context"]
        qa_list = []
        for qa in paragraph["qas"]:
            question = qa["question"]
            short_answer = None
            long_answer = None
            short_start = None
            short_end = None
            long_start = None
            long_end = None
            if "answers" in qa:
                if "short" in qa["answers"]:
                    short_answer = qa["answers"]["short"][0]["text"]
                    short_start = qa["answers"]["short"][0]["start"]
                    short_end = qa["answers"]["short"][0]["end"]
                if "long" in qa["answers"]:
                    long_answer = qa["answers"]["long"][0]["text"]
                    long_start = qa["answers"]["long"][0]["start"]
                    long_end = qa["answers"]["long"][0]["end"]
            qa_list.append({'Question': question, 'Short Answer': short_answer, 'Long Answer': long_answer})
        inputs.append({'Context': context, 'Questions_Answers': qa_list})

model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

data_collator = MyDataCollator(tokenizer)

batch = data_collator.collate_batch(inputs)

output_dir = "./finetuned_model"
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

training_args = TrainingArguments(
    output_dir=output_dir,
    overwrite_output_dir=True,
    num_train_epochs=2,
    learning_rate=2e-4,
    per_device_train_batch_size=2,
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
    save_strategy="epoch"
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=batch,  # Itt közvetlenül adjuk át a batch-et
    callbacks=[PlotCallback()],
)

trainer.train()

tokenizer.save_pretrained(output_dir)
`

However, I’m encountering the following errors:

C:UsersLeventeDesktopminigpt.venvScriptspython.exe C:UsersLeventeDesktopminigpttrain.py 
2024-05-01 12:57:58.680455: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-01 12:57:59.278669: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-01 12:58:04.644087: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.

Traceback (most recent call last):
  File "C:UsersLeventeDesktopminigpttrain.py", line 133, in <module>
    batch = data_collator.collate_batch(inputs)
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:UsersLeventeDesktopminigpttrain.py", line 64, in collate_batch
    "input_ids": torch.nn.utils.rnn.pad_sequence([i[0] for i in inputs], batch_first=True),
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:UsersLeventeDesktopminigpt.venvLibsite-packagestorchnnutilsrnn.py", line 399, in pad_sequence
    return torch._C._nn.pad_sequence(sequences, batch_first, padding_value)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: The size of tensor a (349) must match the size of tensor b (327) at non-singleton dimension 1

Process finished with exit code 1

What could be the solution to the problem to make the program run successfully? Unfortunately, time is pressing, and I’m quite puzzled.

Unfortunately, I’ve tried many things, but I’ve encountered various other problems. The data processing works fine, but the issue always arises when I pass it to the trainer. There was a problem with passing a list containing dictionaries, and another problem occurred with “Scalar tensor has no len()”. What could be the solution? Could you help me fix the code?

Thiết kế website giá rẻ

Danh mục

I’m trying to train a model for a question-answer task, but I keep encountering errors. Why?