As title.
First put the data into Redis as hash
df=embeddings.sql_query()
for index, row in df.iterrows():
chunkid = row['chunkId']
tags_dict = json.loads(row['Filetags'])
r.hset(f"chunkid:{chunkid}", "kId", row['kId'])
r.hset(f"chunkid:{chunkid}", "Content", row['Content'])
r.hset(f"chunkid:{chunkid}", "Filename", row['Filename'])
r.hset(f"chunkid:{chunkid}", "Vector", np.array(row['Vector'], dtype=np.float32).tobytes())
for tag, value in tags_dict.items():
r.hset(f"chunkid:{chunkid}", tag, value)
Then, index using the hase
INDEX_NAME = "name"
DOC_PREFIX = "hash:"
try:
r.ft(INDEX_NAME).info()
print("Index already exists!")
except:
# schema
schema = (
TagField("Filename"),
TextField("Content"),
VectorField("vector",
"FLAT", {
"TYPE": "FLOAT32",
"DIM":1536,
"DISTANCE_METRIC": "COSINE"
}
),
)
definition = IndexDefinition(prefix=[DOC_PREFIX], index_type=IndexType.HASH)
r.ft(INDEX_NAME).create_index(fields=schema, definition=definition)
Finally, perform a vector search
vec = embeddings.text_embedding(input)
query_embedding = np.array(vec, dtype=np.float32)
base_query = f'*=>[KNN {k} @vector $vec AS vector_score]'
query = (
Query(base_query)
.sort_by('vector_score')
.return_fields("id", "Filename", "Content", "vector_score")
.paging(0, k)
.dialect(2)
)
query_params = {
"vec": query_embedding.tobytes()
}
results = r.ft(index_name).search(query, query_params)
But the execution results all return 0, like the following
Found 0 results []
Do you have any suggestions for adjustments?