I have this code, parallel odd-even merge sort. I am now trying to optimize it more, and I think a good idea is to parallelize the for loop that is commented, as that comparations are independent. But, when I parallelize that loop, only worse performance is achieved. I printed the thread ID inside the loop, and only one thread is looping, even if I write num_threads(2)
#pragma omp parallel
{
#pragma omp single
oddEvenMergeSort(newArr, newSize, 0, limit);
}
void oddEvenMergeSort(int *arr, int n, int level, int limit) {
if (n > 1) {
int m = n / 2;
if (level < limit)
{
level += 1;
#pragma omp task shared(arr)
oddEvenMergeSort(arr, m, level, limit);
#pragma omp task shared(arr)
oddEvenMergeSort(arr + m, n - m, level, limit);
#pragma omp taskwait
oddEvenMerge(arr, n);
}
else {
oddEvenMergeSort(arr, m, level, limit);
oddEvenMergeSort(arr + m, n - m, level, limit);
oddEvenMerge(arr, n);
}
}
}
void oddEvenMerge(int *arr, int n) {
if (n == 1) return;
if (n == 2) {
compare(arr, 0, 1);
return;
}
else {
int *odd = (int *)malloc(sizeof(int) * (n / 2));
int *even = (int *)malloc(sizeof(int) * (n / 2));
oddEvenSplit(arr, odd, even, n);
oddEvenMerge(odd, n / 2);
oddEvenMerge(even, n / 2);
oddEvenJoin(arr, odd, even, n);
// #pragma omp parallel for schedule(static, 1)
for (int i = 1; i < n / 2; i++) {
printf("Thread %dn", omp_get_thread_num());
compare(arr, 2 * i - 1, 2 * i);
}
free(odd);
free(even);
}
}