I have been trying to learn multiprocessing. I wanted to test my understanding by making a little program that would generate a list of random numbers. Then make two different methods to find the largest number in the list. The first method not using multiprocessing and the second using multiprocessing. However when I run the code the runtime is always hundreds of times slower when I use the multiprocessing code regardless of the length of the list.
import random
import multiprocessing
import time
def random_num_list(length=100000):
return [random.randint(-13443,435234) for i in range(length)]
rando_list = random_num_list()
def method1(lst):
biggest_num = -13443
for i in lst:
if i > biggest_num:
biggest_num = i
return biggest_num
lst1, lst2 = rando_list[:int(len(rando_list)/3)], rando_list[int(len(rando_list)/3):]
def partial_process(lst1, lst2):
biggest_num = -13443
for i in lst1:
if i > biggest_num:
biggest_num = i
lst2.append(biggest_num)
#print("New record added!n")
def full_process(lst2):
biggest_num = -13443
for i in lst2:
if i > biggest_num:
biggest_num = i
print(biggest_num)
def method2(lst1,lst2):
with multiprocessing.Manager() as manager:
lst2 = manager.list(lst2)
p1 = multiprocessing.Process(target=partial_process,args=(lst1, lst2))
p2 = multiprocessing.Process(target=full_process,args=(lst2,))
p1.start()
p1.join()
p2.start()
p2.join()
if __name__ == '__main__':
method2(lst1,lst2)
print(method1(rando_list))
I also tried writing method2 like this as well:
def method2(lst):
num_processes = multiprocessing.cpu_count() # Number of processes to match the number of CPU cores
chunk_size = len(lst) // num_processes
with multiprocessing.Pool(processes=num_processes) as pool:
# Split the list into chunks and process in parallel
results = pool.map(find_max, [lst[i * chunk_size:(i + 1) * chunk_size] for i in range(num_processes)])
return max(results)