I have never written code in SIMD, and I wrote a simple code to test the performance and set my data as shown below however running the code does not give me superior performance as I should expect.
#include <iostream>
#include <ctime>
#include <smmintrin.h>
#include <immintrin.h>
unsigned t0, t1;
struct VAL
{
union
{
float val[4];
__m128 val_simd;
};
};
int main()
{
unsigned t0, t1;
double time;
VAL *a, *b, *c;
unsigned test_num = 100000000;
a = new VAL[test_num];
b = new VAL[test_num];
c = new VAL[test_num];
t0 = clock();
for (int i = 0; i < test_num; i++)
{
a[i].val[0] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
a[i].val[1] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
a[i].val[2] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
a[i].val[3] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
b[i].val[0] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
b[i].val[1] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
b[i].val[2] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
b[i].val[3] = static_cast <float> (rand()) / static_cast <float> (RAND_MAX);
c[i].val[0] = 0.0f;
c[i].val[1] = 0.0f;
c[i].val[2] = 0.0f;
c[i].val[3] = 0.0f;
}
t1 = clock();
time = (double(t1 - t0) / CLOCKS_PER_SEC);
std::cout << "Execution Time0: " << time << 'n';
t0 = clock();
for (int i = 0; i < test_num; i++)
{
c[i].val[0] = a[i].val[0] + b[i].val[0];
c[i].val[1] = a[i].val[1] + b[i].val[1];
c[i].val[2] = a[i].val[2] + b[i].val[2];
//c[i].val[3] = a[i].val[3] + b[i].val[3];
}
t1 = clock();
time = (double(t1 - t0) / CLOCKS_PER_SEC);
std::cout << "Execution Time1: " << time << 'n';
t0 = clock();
for (int i = 0; i < test_num; i++)
{
c[i].val_simd = _mm_add_ps(a[i].val_simd, b[i].val_simd);
}
t1 = clock();
time = (double(t1 - t0) / CLOCKS_PER_SEC);
std::cout << "Execution Time2: " << time << 'n';
delete[] a;
delete[] b;
delete[] c;
}
I expected to see much better performance, at least 3x the performance, however this gave the same times. On the other hand, testing the same code but multiplying SIMD gave me but performance
New contributor
Gonzalo Vasquez is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.