Thiết kế website giá rẻ

Question

I’m working on an automatic natural language processing (NLP) project using Hugging Face transformers. I’m a real beginner so maybe I’m misunderstanding. I’ve created a custom model from GPTNeoForCausalLM to implement a customized forward method that will always return the same encoded sentence (“wrong token”).

My problem is that when I evaluate this template, I only get an output of the word “wrong” and not the whole sentence.

Here is the relevant code for my evaluation function:

<code>def evaluate_model(

model: PreTrainedModel,

dataset: Dataset,

pre_trained_tokenizer: PreTrainedTokenizerBase,

batch_size: int = 8,

context_window_size: int = 1536,

) -> float:

for item in dataset:

inputs = pre_trained_tokenizer(item["prompt"], truncation=True, max_length=context_window_size, return_tensors='pt')

with torch.no_grad():

predictions = model.forward(**inputs)

predicted_token_ids = torch.argmax(predictions.logits, dim=-1)

# decoding the most probable prediction

predicted_completion = pre_trained_tokenizer.decode(predicted_token_ids[0], skip_special_tokens=True)

print('nPredicted completion:', predicted_completion)

...

</code>

<code>def evaluate_model( model: PreTrainedModel, dataset: Dataset, pre_trained_tokenizer: PreTrainedTokenizerBase, batch_size: int = 8, context_window_size: int = 1536, ) -> float: for item in dataset: inputs = pre_trained_tokenizer(item["prompt"], truncation=True, max_length=context_window_size, return_tensors='pt') with torch.no_grad(): predictions = model.forward(**inputs) predicted_token_ids = torch.argmax(predictions.logits, dim=-1) # decoding the most probable prediction predicted_completion = pre_trained_tokenizer.decode(predicted_token_ids[0], skip_special_tokens=True) print('nPredicted completion:', predicted_completion) ... </code>

def evaluate_model(
    model: PreTrainedModel,
    dataset: Dataset,
    pre_trained_tokenizer: PreTrainedTokenizerBase, 
    batch_size: int = 8,
    context_window_size: int = 1536, 
) -> float:
    
    for item in dataset:
        inputs = pre_trained_tokenizer(item["prompt"], truncation=True, max_length=context_window_size, return_tensors='pt')

        with torch.no_grad():
            predictions = model.forward(**inputs)
        
        predicted_token_ids = torch.argmax(predictions.logits, dim=-1)

        # decoding the most probable prediction
        predicted_completion = pre_trained_tokenizer.decode(predicted_token_ids[0], skip_special_tokens=True)

        print('nPredicted completion:', predicted_completion)

        ...

My class:

<code>class Test(GPTNeoForCausalLM):

def __init__(

self,

config: GPTNeoConfig = transformers.AutoConfig.from_pretrained("roneneldan/TinyStories-1M"),

) -> None:

super().__init__(config)

self.model = GPTNeoForCausalLM.from_pretrained("roneneldan/TinyStories-1M")

self.tokenizer = transformers.AutoTokenizer.from_pretrained("roneneldan/TinyStories-1M",

revision="8cd14d5",

cache_dir="./data/",

padding_side="left",

)

def forward(

self,

input_ids: Tensor | None = None,

past_key_values: tuple[FloatTensor] | None = None,

attention_mask: Tensor | None = None,

token_type_ids: Tensor | None = None,

position_ids: Tensor | None = None,

head_mask: Tensor | None = None,

inputs_embeds: Tensor | None = None,

labels: Tensor | None = None,

use_cache: bool | None = None,

output_attentions: bool | None = None,

output_hidden_states: bool | None = None,

return_dict: bool | None = None,

) -> tuple[torch.Tensor] | CausalLMOutputWithCrossAttentions:

vocab_size = self.config.vocab_size

logits = torch.zeros((1, vocab_size))

wrong_answer_token_id = self.tokenizer.encode("wrong token", add_special_tokens=False)[0]

logits[0, wrong_answer_token_id] = float('inf')

return CausalLMOutputWithCrossAttentions(

logits=logits,

past_key_values=past_key_values,

attentions=None,

hidden_states=None,

)

</code>

<code>class Test(GPTNeoForCausalLM): def __init__( self, config: GPTNeoConfig = transformers.AutoConfig.from_pretrained("roneneldan/TinyStories-1M"), ) -> None: super().__init__(config) self.model = GPTNeoForCausalLM.from_pretrained("roneneldan/TinyStories-1M") self.tokenizer = transformers.AutoTokenizer.from_pretrained("roneneldan/TinyStories-1M", revision="8cd14d5", cache_dir="./data/", padding_side="left", ) def forward( self, input_ids: Tensor | None = None, past_key_values: tuple[FloatTensor] | None = None, attention_mask: Tensor | None = None, token_type_ids: Tensor | None = None, position_ids: Tensor | None = None, head_mask: Tensor | None = None, inputs_embeds: Tensor | None = None, labels: Tensor | None = None, use_cache: bool | None = None, output_attentions: bool | None = None, output_hidden_states: bool | None = None, return_dict: bool | None = None, ) -> tuple[torch.Tensor] | CausalLMOutputWithCrossAttentions: vocab_size = self.config.vocab_size logits = torch.zeros((1, vocab_size)) wrong_answer_token_id = self.tokenizer.encode("wrong token", add_special_tokens=False)[0] logits[0, wrong_answer_token_id] = float('inf') return CausalLMOutputWithCrossAttentions( logits=logits, past_key_values=past_key_values, attentions=None, hidden_states=None, ) </code>

class Test(GPTNeoForCausalLM):

    def __init__(
        self,
        config: GPTNeoConfig = transformers.AutoConfig.from_pretrained("roneneldan/TinyStories-1M"),
    ) -> None:
        super().__init__(config)
        self.model = GPTNeoForCausalLM.from_pretrained("roneneldan/TinyStories-1M")
        self.tokenizer = transformers.AutoTokenizer.from_pretrained("roneneldan/TinyStories-1M",
            revision="8cd14d5",
            cache_dir="./data/",
            padding_side="left",
        )

    def forward(
        self,
        input_ids: Tensor | None = None,
        past_key_values: tuple[FloatTensor] | None = None,
        attention_mask: Tensor | None = None,
        token_type_ids: Tensor | None = None,
        position_ids: Tensor | None = None,
        head_mask: Tensor | None = None,
        inputs_embeds: Tensor | None = None,
        labels: Tensor | None = None,
        use_cache: bool | None = None,
        output_attentions: bool | None = None,
        output_hidden_states: bool | None = None,
        return_dict: bool | None = None,
    ) -> tuple[torch.Tensor] | CausalLMOutputWithCrossAttentions:
        
        vocab_size = self.config.vocab_size
        logits = torch.zeros((1, vocab_size))
        
        wrong_answer_token_id = self.tokenizer.encode("wrong token", add_special_tokens=False)[0]
        logits[0, wrong_answer_token_id] = float('inf')

        return CausalLMOutputWithCrossAttentions(
            logits=logits,
            past_key_values=past_key_values,
            attentions=None,
            hidden_states=None,
        )

My evaluation code must also work with the TinyStories Model (which is currently the case, I’m getting all the predictions).

What am I doing wrong? Thanks in advance to those who will answer me!

Thiết kế website giá rẻ

Danh mục

How to decode a sequence of tokens into multiple words in Hugging Face Transformers?