I am requesting all of you to help me with my problem!
I’m training my model, I have a dataset class and have shaped the dictionary data = {‘pixel_values’: pixel_values, ‘text’: text} in its getitem (picture count data and text data) but I’m now reporting an error: texts.append(data[ ‘text’])
KeyError: ‘text’
I located the relevant files, and found that the error is in the datacollector class, inside the call function, it did not find the “text” key when using the dictionary, I am also debugging, and found that the parameters passed into this method in the feature does not have the key text, I do not know! I don’t know what the problem is, and I’ve followed chat-gpt’s method to troubleshoot the problem, such as the data returned by dataset’s getitem (which is fine!), and the data returned by getitem (which is fine!). , requesting all of you to help me!
My datacollector class :
`from typing import Any, Dict, List
import torch
class CLIPCollator(object):
def init(self, clip_processor, max_seq_length):
self.clip_processor = clip_processor
self.max_seq_length = max_seq_length
def __call__(self, features: List[Dict[str, Any]]) -> Dict[str, Any]:
texts, pixel_values_list = [], []
# print(features)
for data in features:
# 如果图片预处理失败,则跳过该图片
# if data['pixel_values'] is None:
# continue
# print(data)
# assert 1 == 2
# if data['text'] is None:
# continue
texts.append(data['text'])
pixel_values_list.append(data['pixel_values'])
# 进行tokenize
inputs = self.clip_processor(
text=texts, return_tensors="pt", max_length=self.max_seq_length, truncation=True, padding=True
)
pixel_values_list = torch.concat(pixel_values_list, dim=0)
inputs['return_loss'] = True
inputs['pixel_values'] = pixel_values_list
inputs.pop('token_type_ids')
return inputs
my dataset class:
from torch.utils.data import Dataset
import pandas as pd
from PIL import Image
import requests
from loguru import logger
from glob import glob
from os.path import join
from tqdm import tqdm
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image
import os
class CLIPDataset(Dataset):
def __init__(self, file, clip_processor, image_path):
df = pd.read_csv(file, usecols=['text', 'filename'])
data_list = df.to_dict('records')
print('len of data:{}'.format(len(data_list)))
self.data_list = data_list
self.clip_processor = clip_processor
self.image_path = image_path
# 定义图像预处理流程
self.transform = transforms.Compose([
transforms.Resize((224, 224)), # 调整图像大小
transforms.ToTensor(), # 将图像转换为PyTorch张量
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # 归一化
])
def __len__(self):
return len(self.data_list)
def __getitem__(self, index):
row = self.data_list[index]
text = row['text'].strip()
filename = row['filename']
# print(text)
file = join(self.image_path, filename)
try:
image = Image.open(file).convert('RGB')
except Exception as e:
# 下载图片失败
logger.info('open image error, text: {}, filename:{}'.format(text, filename))
logger.info(e)
image = None
# 应用预处理转换
if self.transform:
image = self.transform(image)
if image is None:
pixel_values = None
else:
pixel_values = self.clip_processor(images=image, return_tensors='pt')['pixel_values']
data = {'pixel_values': pixel_values, 'text': text}
# assert 1 == 2
return data
`
胡广成 is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.