I have an ML model written in torch which produces an embedding of size 512.
I wrote a custom script utilizing the model and sending the embedding to the collection (which is empty).
class FaceEmbedding(Model):
def __init__(self, MONGO_CONNECTION_STRING:str=MONGO_CONNECTION_STRING) -> None:
super().__init__()
self.FaceEmbeddingModel = torch.load('Model/model2.pth')
self.device = next(self.FaceEmbeddingModel.parameters()).device
self.T = v2.Compose([
v2.ToImage(),
v2.ToDtype(torch.float32),
v2.Resize(256),
v2.CenterCrop(224),
v2.Normalize(mean = [0.48235, 0.45882, 0.40784],
std=[0.00392156862745098, 0.00392156862745098, 0.00392156862745098])
])
self.dbName = "FaceSimilarity"
self.collectionName = "Embeddings"
self.client = MongoClient(MONGO_CONNECTION_STRING)
self.collection = self.client[self.dbName][self.collectionName]
def __makeEucEmbeddings(self, img:np.ndarray)->np.ndarray:
img_t = self.T(img)
img_t = torch.unsqueeze(img_t, dim=0)
embedding = self.FaceEmbeddingModel.pos(img_t.to(self.device))
del img_t
torch.cuda.empty_cache()
return embedding.squeeze().cpu().detach().numpy()
def makeEmbeddings(self, img:np.ndarray, k:int):
face_locations = fr.face_locations(img)
sorted(face_locations, key = lambda rect: abs(rect[2]-rect[0])*abs(rect[1]-rect[3]))
face_locations = face_locations[:k][::-1]
EucEmb = []
FREmb = []
for face in face_locations:
top, right, bottom, left = face
face_img = img[top:bottom, left:right]
FREmb.append(fr.face_encodings(img, [face])[0])
Euc = self.__makeEucEmbeddings(face_img)
EucEmb.append(Euc.tolist())
return EucEmb, FREmb, face_locations
def __make_pipeline(self, EucEmb):
pipeline = [{
"$vectorSearch": {
"index": "vector_index",
"path": "EuclidianEmbedding",
"queryVector":EucEmb,
"numCandidates":200,
"limit":10
}
}]
return pipeline
def saveEmbedding(self, embeddings)->None:
data = []
for EucEmb, FREmb in embeddings:
data.append({
"EuclidianEmbedding":EucEmb,
"FREmbedding":FREmb
})
self.collection.insert_many(data)
def __vectorSearch(self, img, k):
EucEmb, FREmb, face_locations = self.makeEmbeddings(img, k)
ResEmb = []
if isinstance(EucEmb, list):
for emb in EucEmb:
ResEmb.append(self.collection.aggregate(self.__make_pipeline(emb)))
RecFace = []
NotRecFace = []
for emb in range(len(FREmb)):
match = fr.compare_faces([i['FREmbedding'] for i in ResEmb], FREmb[emb])[0]
if True in match:
idx = match.index(True)
RecFace.append(ResEmb[idx])
else:
idx = FREmb.index(emb)
NotRecFace.append([
EucEmb[emb],
FREmb[emb]
])
return RecFace, NotRecFace, face_locations
def vectorSearch(self, img, k, SaveNotRecFace=False):
RecFace, NotRecFace, face_locations = self.__vectorSearch(img, k)
if SaveNotRecFace:
for embedding in NotRecFace:
self.saveEmbedding(embedding)
return RecFace, NotRecFace, face_locations
I am getting this error upon running the script on various images with one face common among them.
OperationFailure: PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector, full error: {'ok': 0.0, 'errmsg': 'PlanExecutor error during aggregation :: caused by :: EuclidianEmbedding_1 is not indexed as knnVector', 'code': 8, 'codeName': 'UnknownError', '$clusterTime': {'clusterTime': Timestamp(1716546450, 2), 'signature': {'hash': b'xf4B1Nxc2xffGx9d$J}xeaxadxfexdfzx83Cxx80', 'keyId': 7345064297216606213}}, 'operationTime': Timestamp(1716546450, 2)}
Defined Indexes in the Mongo Compass for the collection
In Atlas Vector Search, I defined this JSON
{
"fields": [
{
"numDimensions": 512,
"path": "EuclideanEmbedding",
"similarity": "euclidean",
"type": "vector"
}
]
}
I searched this up on the docs of MongoDB and came across the knnVector type index in Atlas Vector Search but, changing the type in the JSON gives error again.