I’m facing a performance issue when performing matrix multiplication operations using the Eigen and ViennaCL libraries in C++. I’m comparing the performance between executing these operations on the integrated GPU of my system and on the CPU.
My system has an integrated Intel GPU, and I’m running the code on an eighth-generation Intel Core i5. To my surprise, I found that matrix multiplication takes about 200 seconds when executed on the GPU using ViennaCL, while it takes only about 20 seconds when executed on the CPU using Eigen.
I’m puzzled by this performance discrepancy and would like to understand better the reason behind it. Can an integrated GPU really have such inferior performance compared to the CPU for matrix multiplication operations?
#include <Eigen/Dense>
#include <chrono>
#include <iostream>
#include <viennacl/matrix.hpp>
int main() {
const int size = 1000; // Size of the matrices
// Creating two large matrices using ViennaCL
viennacl::matrix<float> matrix1_viennacl(size, size);
viennacl::matrix<float> matrix2_viennacl(size, size);
// Initializing the matrices with random values
for (int i = 0; i < size; ++i) {
for (int j = 0; j < size; ++j) {
matrix1_viennacl(i, j) = rand() / static_cast<float>(RAND_MAX);
matrix2_viennacl(i, j) = rand() / static_cast<float>(RAND_MAX);
}
}
// Performing intensive computation with the matrices using ViennaCL and measuring the
// execution time
auto start_viennacl = std::chrono::steady_clock::now();
for (int i = 0; i < 100; ++i) {
// Performing a matrix-matrix multiplication operation with ViennaCL
viennacl::matrix<float> result_viennacl =
viennacl::linalg::prod(matrix1_viennacl, matrix2_viennacl);
}
auto end_viennacl = std::chrono::steady_clock::now();
std::chrono::duration<double> time_viennacl = end_viennacl - start_viennacl;
// Printing the execution time with ViennaCL
std::cout << "Execution time with ViennaCL: " << time_viennacl.count()
<< " seconds" << std::endl;
// Creating two large matrices using Eigen
Eigen::MatrixXf matrix1_eigen(size, size);
Eigen::MatrixXf matrix2_eigen(size, size);
// Initializing the matrices with the same random values
for (int i = 0; i < size; ++i) {
for (int j = 0; j < size; ++j) {
matrix1_eigen(i, j) = matrix1_viennacl(i, j);
matrix2_eigen(i, j) = matrix2_viennacl(i, j);
}
}
// Performing intensive computation with the matrices using Eigen and measuring the
// execution time
auto start_eigen = std::chrono::steady_clock::now();
for (int i = 0; i < 100; ++i) {
// Performing a matrix-matrix multiplication operation with Eigen
Eigen::MatrixXf result_eigen = matrix1_eigen * matrix2_eigen;
}
auto end_eigen = std::chrono::steady_clock::now();
std::chrono::duration<double> time_eigen = end_eigen - start_eigen;
// Printing the execution time with Eigen
std::cout << "Execution time with Eigen: " << time_eigen.count()
<< " seconds" << std::endl;
return 0;
}