#include <iostream>
#include <chrono>
void foo(int* a, int* b) {
*a+=1;
*b+=1;
*a+=1;
}
void goo(int* __restrict a, int* b) {
*a+=1;
*b+=1;
*a+=1;
}
void measure() {
int x = 1;
int y = 2;
auto start_foo = std::chrono::high_resolution_clock::now();
for(int i = 0; i < 10000000; ++i) {
foo(&x, &y);
}
auto end_foo = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration_foo = end_foo - start_foo;
std::cout << "foo Runtime(secs): " << duration_foo.count() << std::endl;
auto start_goo = std::chrono::high_resolution_clock::now();
for(int i = 0; i < 10000000; ++i) {
goo(&x, &y);
}
auto end_goo = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration_goo = end_goo - start_goo;
std::cout << "goo Runtime(secs): " << duration_goo.count() << std::endl;
std::cout << "Without/With: " << duration_foo.count() / duration_goo.count() << std::endl << std::endl;
}
int main() {
for (int i = 0; i < 10; ++i) {
measure();
}
}
I see that goo
is optimized as expected via __restirct
, but the output confused me a lot since there are cases when goo
performs much more worse than foo
. In 3/10 cases they perform similiarly, in 3/10 cases foo
performs better, and only in 4/10 cases we have expected result.
Please, explain why?