I’ve developed a Python script to calculate match scores between users based on their locations. The script generates combinations of users, calculates the average weight for each combination, computes the standard deviation of weights, and finally assigns a score to each combination. However, as the number of users increases, the script’s performance becomes noticeably slower.
import random
import statistics
def comb(arr):
if len(arr) == 2:
yield [arr]
for i in range(1, len(arr)):
first = [arr[0], arr[i]]
rest = [x for j, x in enumerate(arr) if j != 0 and j != i]
for com in comb(rest):
yield [first] + com
def calculate_weight(combination):
location_1 = combination[0]['location']
location_2 = combination[1]['location']
weight = location_weights.get((location_1, location_2), 0)
if weight == 0:
weight = location_weights.get((location_2, location_1), 0)
return weight
location_weights = {('A', 'A'): 1, ('B', 'B'): 1, ('C', 'C'): 1, ('A', 'B'): 0.3, ('B', 'C'): 0.7, ('A', 'C'): 0.5}
locations = ['A', 'B', 'C']
users = []
# Generate users with random locations
for i in range(0, 4):
new_user = {
'name': f'User{i}',
'location': random.choice(locations)
}
users.append(new_user)
combinations = comb(users)
matches = []
for i, combination in enumerate(combinations):
weights = []
for pot_match in combination:
weights.append(calculate_weight(pot_match))
average = sum(weights) / len(weights)
stdev = statistics.stdev(weights)
matches.append({
'users': combination,
'average': average,
'stdev': stdev,
'score': average/(100+stdev)
})
matches = sorted(matches, key=lambda x: x['score'], reverse=True)
for m in matches:
print(m)
I’m seeking advice on how to optimize this code for better performance, especially when dealing with a larger number of users, is there a way or should i completely change my approach.
I am a beginner programmer, when you find some other mistakes or have optimization suggestions, feel free to tell them.