I’ve got following struct:
(Golang and mongo)
Account struct {
ID primitive.ObjectID `bson:"_id" json:"id,omitempty"`
AccountId string
SessionId string
Template string
JoinDate time.Time
// ...
}
When I try to find duplicates (and remove them) bofore I create unique index on those fields: AccountId + SessionId + Template + JoinDate.
So this functions is called with AccountId,SessionId,Template,JoinDate as fields
func (ms *MongoStorage) deleteDuplicates(col string, fields ...string) (int64, error) {
type findDuplicatesResult struct {
Count int64 `bson:"count"`
ObjectIDs []primitive.ObjectID `bson:"objectIDs"`
Id map[string]string `bson:"_id"`
}
var (
dc int64
fieldsFilter = make(primitive.M)
mongoColl = ms.getCol(col)
)
for _, field := range fields {
fieldsFilter[field] = fmt.Sprintf("$%s", field)
}
findDuplicatesFilter := primitive.A{
primitive.M{"$group": primitive.M{
"_id": fieldsFilter,
"objectIDs": primitive.M{"$push": "$_id"},
"count": primitive.M{"$sum": 1},
}},
primitive.M{"$match": primitive.M{"count": primitive.M{"$gt": 1}}},
}
results := []findDuplicatesResult{}
var cur *mongo.Cursor
cur, err := mongoColl.Aggregate(ms.GetContext(), findDuplicatesFilter)
if err != nil {
return 0, err
}
err = cur.All(ms.GetContext(), &results)
if err != nil {
return 0, err
}
if len(results) > 0 {
for _, result := range results {
deleteFilter := primitive.M{
"_id": primitive.M{
"$in": result.ObjectIDs[1:],
},
}
mdr, err := mongoColl.DeleteMany(ms.GetContext(), deleteFilter)
This:
err = cur.All(ms.GetContext(), &results)
if err != nil {
return 0, err
}
causes following error:
"cannot decode UTC datetime into a string type"
https://www.mongodb.com/community/forums/t/best-way-to-remove-duplicates-for-millions-of-records/178245
because JoinDate is type of time.Time
Are there any ways to fix this?