lets state clearly my problem. I have a feature_extractor.pkl file that denotes a FeatureExtractor object which object has inside a BERT model like the following class
class BERTFeatureExtractor:
def __init__(self, pretrained_bert_model_name, process_fn, process_fn_kwargs=None):
self.pretrained_bert_model_name = pretrained_bert_model_name
self.process_fn = process_fn
self.process_fn_kwargs = process_fn_kwargs if process_fn_kwargs is not None else {}
# Load pre-trained BERT tokenizer and model
print('IMMA USE', self.pretrained_bert_model_name)
self.tokenizer = BertTokenizer.from_pretrained(self.pretrained_bert_model_name)
self.model = BertModel.from_pretrained(self.pretrained_bert_model_name)
# Move model to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
self.model.to(device)
return
def fit(self, df):
return
def transform(self, df, id_cols=None):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
start = time()
texts = df['essay']
if id_cols is not None:
identifiers = df[id_cols]
# Prepare BERT features for each document
bert_features = []
for text in tqdm(texts, desc='BERT Features'):
encoded_input = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)
# Move input tensors to GPU
encoded_input = {key: value.to(device) for key, value in encoded_input.items()}
with torch.no_grad():
output = self.model(**encoded_input, output_hidden_states=True)
embeddings = self.process_fn(output, **self.process_fn_kwargs).cpu().numpy()
bert_features.append(embeddings)
bert_feature_names = [f'BERT Feature {i + 1}' for i in
range(len(bert_features[0]))]
bert_df = pd.DataFrame(bert_features, columns=bert_feature_names)
if id_cols is not None:
bert_df = pd.concat([identifiers, bert_df], axis=1)
print('BERT finished after', time() - start, 'seconds')
return bert_df #, self.model, self.tokenizer
def fit_transform(self, df, id_cols=None):
return self.transform(df, id_cols)
and when i load a model i get Attempting to deserialize object on a CUDA device but torch.cuda.is_available() is False. If you are running on a CPU-only machine, please use torch.load with map_location=torch.device('cpu') to map your storages to the CPU.
Is there something I can do and load to cpu except recreating the pickle files?
ManosL is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.