Studies on Square Roots

This post marks the beginning of my ventures into calculating square roots. However, I have no idea how the processor, more specifically the FPU with the fsqrt instruction, performs the process. I have included a better method for measuring time and also made some improvements to the method I wanted to demonstrate in that post, achieving fewer iterations than the original. However, due to the many divisions, it is still a bit slower than the incredible Newton’s method. Therefore, I will post two codes I developed during this time, for you to give your opinion and see what can be improved. Especially regarding the calculation of Carmack’s method.

Here is the improved code from the initial post:

#include <iostream>
#include <iomanip>
#include <cmath>
#include <chrono>
#include <limits>
#include <vector>
#include <random>
#include <algorithm>
#include <numeric>

// Constants
const int NUM_EXECUTIONS = 1000000;
const double precision = 1e-12; // Adjusted precision
bool showNewtonIteration = true;
bool showAnothersIteration = true;

struct IterationData {
    double number;
    double sqrtResult;
    int iterations;
};

// Function to calculate a good initial approximation
double calculateInitial(double number) {
    double initial = (number + 1) * 0.5; // Use multiplication instead of division
    for (int i = 0; i < 5; i++) {
        initial = 0.5 * (initial + number / initial);
    }
    return initial;
}

// Optimized Newton's method for finding the square root
double newtonSqrt(double number, double precision, int &iterations, bool showIteration) {
    double guess = number * 0.5; // Starting guess
    double nextGuess;
    iterations = 0;

    while (true) {
        nextGuess = 0.5 * (guess + number / guess);
        iterations++;
        if (showIteration && showNewtonIteration) {
            std::cout << "Newton's method - ";
            std::cout << std::setprecision(std::numeric_limits<double>::max_digits10) << nextGuess << " and more...";
            if (std::fabs(nextGuess - guess) < precision) {
                std::cout << " reached desired precision";
            }
            std::cout << std::endl;
        }
        if (std::fabs(nextGuess - guess) < precision) {
            break;
        }
        guess = nextGuess;
    }
    return nextGuess;
}

// Iterative another's method with dynamic initial approximation
double anothersMethod(double number, double precision, int &iterations, bool showIteration) {
    double f = calculateInitial(number); // Using the initial approximation
    double e;
    double prev_f = 0;
    iterations = 0;

    while (true) {
        f = 0.5 * (f + number / f); // Recalculate initial dynamically
        e = f - (f * f - number) / (f + number / f); // New formula

        // Check if the value repeats within the precision
        if (std::fabs(f - prev_f) < precision) {
            break;
        }
        prev_f = f;
        f = e;
        iterations++;
        if (showIteration && showAnothersIteration) {
            std::cout << "Another's method - ";
            std::cout << std::setprecision(std::numeric_limits<double>::max_digits10) << f << " and more...";
            if (std::fabs(f - prev_f) < precision) {
                std::cout << " reached desired precision";
            }
            std::cout << std::endl;
        }
    }
    return f;
}

// Function to calculate the error
double calculateError(double root, double number) {
    double square = root * root;
    return std::fabs(square - number);
}

void showIterations(double number, double &resultNewton, double &resultAnothers, int &iterationsNewton, int &iterationsAnothers) {
    resultNewton = newtonSqrt(number, precision, iterationsNewton, true);
    std::cout << std::setprecision(std::numeric_limits<double>::max_digits10)
              << "Newton's method final result: " << resultNewton << " reached desired precision, Iterations: " << iterationsNewton << "nn";

    resultAnothers = anothersMethod(number, precision, iterationsAnothers, true);
    std::cout << std::setprecision(std::numeric_limits<double>::max_digits10)
              << "Another's method final result: " << resultAnothers << " reached desired precision, Iterations: " << iterationsAnothers << "nn";
}

void measureTime(double* randomNumbers, std::vector<double> &timesSqrt, std::vector<double> &timesNewton, std::vector<double> &timesAnothers,
                 std::vector<IterationData> &newtonData, std::vector<IterationData> &anothersData) {
    // Measuring time for standard sqrt function
    std::cout << "Calculating times...n";
    auto startSqrt = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < NUM_EXECUTIONS; i++) {
        volatile double result = std::sqrt(randomNumbers[i]);
    }
    auto endSqrt = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::milli> durationSqrt = endSqrt - startSqrt;
    timesSqrt.push_back(durationSqrt.count());

    // Measuring time for Newton's pure method
    auto startNewton = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < NUM_EXECUTIONS; i++) {
        int iterationsNewton = 0;
        double result = newtonSqrt(randomNumbers[i], precision, iterationsNewton, false);
        newtonData.push_back({randomNumbers[i], result, iterationsNewton});
    }
    auto endNewton = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::milli> durationNewton = endNewton - startNewton;
    timesNewton.push_back(durationNewton.count());

    // Measuring time for the another's method
    auto startAnothers = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < NUM_EXECUTIONS; i++) {
        int iterationsAnothers = 0;
        double result = anothersMethod(randomNumbers[i], precision, iterationsAnothers, false);
        anothersData.push_back({randomNumbers[i], result, iterationsAnothers});
    }
    auto endAnothers = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::milli> durationAnothers = endAnothers - startAnothers;
    timesAnothers.push_back(durationAnothers.count());
}

void displayAverageTimes(const std::vector<double> &timesSqrt, const std::vector<double> &timesNewton, const std::vector<double> &timesAnothers) {
    double avgSqrt = std::accumulate(timesSqrt.begin(), timesSqrt.end(), 0.0) / timesSqrt.size();
    double avgNewton = std::accumulate(timesNewton.begin(), timesNewton.end(), 0.0) / timesNewton.size();
    double avgAnothers = std::accumulate(timesAnothers.begin(), timesAnothers.end(), 0.0) / timesAnothers.size();

    std::cout << "nAverage Results:n";
    std::cout << std::scientific << std::setprecision(6); // Ensure values are displayed in scientific notation
    std::cout << "Standard sqrt average time (ms): " << avgSqrt << std::endl;
    std::cout << "Newton's method average time (ms): " << avgNewton << std::endl;
    std::cout << "Another's method average time (ms): " << avgAnothers << std::endl;
}

void calculateAndDisplayErrors(double number, const std::vector<double> &roots) {
    double actualSqrt = std::sqrt(number);
    std::cout << "nError comparison with actual sqrt(" << number << "): " << std::setprecision(std::numeric_limits<double>::max_digits10) << actualSqrt << "n";
    for (size_t i = 0; i < roots.size(); ++i) {
        double error = calculateError(roots[i], number);
        std::cout << "Error for root " << i + 1 << " (" << std::setprecision(std::numeric_limits<double>::max_digits10) << roots[i] << "): " << error << "n";
    }
}

void displaySomeIterations(const std::vector<IterationData> &newtonData, const std::vector<IterationData> &anothersData) {
    std::vector<IterationData> sortedNewtonData = newtonData;
    std::vector<IterationData> sortedAnothersData = anothersData;

    // Sort the data based on the number of iterations
    std::sort(sortedNewtonData.begin(), sortedNewtonData.end(), [](const IterationData &a, const IterationData &b) {
        return a.iterations > b.iterations;
    });

    std::sort(sortedAnothersData.begin(), sortedAnothersData.end(), [](const IterationData &a, const IterationData &b) {
        return a.iterations > b.iterations;
    });

    std::cout << "nSome random numbers and their iterations for Newton's method:n";
    for (size_t i = 0; i < 10 && i < sortedNewtonData.size(); ++i) {
        std::cout << "Number: " << sortedNewtonData[i].number
                  << ", Iterations: " << sortedNewtonData[i].iterations
                  << ", Sqrt: " << sortedNewtonData[i].sqrtResult << "n";
    }

    std::cout << "nSome random numbers and their iterations for Another's method:n";
    for (size_t i = 0; i < 10 && i < sortedAnothersData.size(); ++i) {
        std::cout << "Number: " << sortedAnothersData[i].number
                  << ", Iterations: " << sortedAnothersData[i].iterations
                  << ", Sqrt: " << sortedAnothersData[i].sqrtResult << "n";
    }
}

void clearScreen() {
    #ifdef _WIN32
        system("cls");
    #else
        system("clear");
    #endif
}

int main() {
    double number;

    std::cout << "Enter the number to find the square root: ";
    std::cin >> number;

    // Show iterations for each method with user input
    double resultNewton, resultAnothers;
    int iterationsNewton, iterationsAnothers;
    showIterations(number, resultNewton, resultAnothers, iterationsNewton, iterationsAnothers);

    std::cout << "Press any key to continue . . .";
    std::cin.get();
    std::cin.get();

    std::vector<double> timesSqrt, timesNewton, timesAnothers;
    std::vector<IterationData> newtonData, anothersData;

    double* randomNumbers = new double[NUM_EXECUTIONS];
    std::mt19937_64 rng;
    std::uniform_real_distribution<double> dist(1.0, 100.0); // Random numbers between 1 and 100

    for (int i = 0; i < NUM_EXECUTIONS; i++) {
        randomNumbers[i] = dist(rng);
    }

    measureTime(randomNumbers, timesSqrt, timesNewton, timesAnothers, newtonData, anothersData);

    delete[] randomNumbers;

    std::cout << "Press any key to continue to error calculations . . .";
    std::cin.get();

    // Display the error calculations
    std::vector<double> roots = {resultNewton, resultAnothers};
    calculateAndDisplayErrors(number, roots);

    std::cout << "Press any key to continue to time measurements . . .";
    std::cin.get();

    // Display the average times
    displayAverageTimes(timesSqrt, timesNewton, timesAnothers);

    std::cout << "Press any key to continue to top iterations display . . .";
    std::cin.get();

    // Display the top iterations
    std::cout << "Some random numbers and their iterations:n";
    displaySomeIterations(newtonData, anothersData);

    std::cout << "Press any key to exit . . .";
    std::cin.get();

    return 0;
}

Here is the code to calculate the magic number:

#include <iostream>
#include <iomanip>
#include <cmath>
#include <limits>
#include <vector>
#include <thread>
#include <mutex>
#include <chrono>
#include <atomic>
#include <sstream>

// Uncomment the following line to compile for Linux
// #define LINUX

#ifdef LINUX
#include <unistd.h>
#define MOVE_CURSOR_TO(x, y) std::cout << "33[" << (x) << ";" << (y) << "H"
#else
#include <windows.h>
#include <conio.h>
void moveCursorTo(int x, int y) {
    COORD coord = {static_cast<SHORT>(y), static_cast<SHORT>(x)};
    SetConsoleCursorPosition(GetStdHandle(STD_OUTPUT_HANDLE), coord);
}
#define MOVE_CURSOR_TO(x, y) moveCursorTo((x), (y))
#endif

const int BAR_WIDTH = 70; // Width of the progress bar
const int SLEEP_DURATION = 1; // Duration to sleep between progress updates in seconds
std::mutex mtx;
float globalMinError = std::numeric_limits<float>::max();
unsigned int globalBestMagicNumber = 0;
std::atomic<bool> done(false);
unsigned int numThreads = std::thread::hardware_concurrency(); // Determine the number of threads based on the processor
std::vector<unsigned int> threadProgress(numThreads, 0);

// Function to calculate the inverse square root using Carmack's method with an adjustable magic number
float fastInverseSqrt(float number, unsigned int magicNumber) {
    float x2 = number * 0.5F;
    float y = number;
    union {
        float f;
        int i;
    } u;
    u.f = y;
    u.i = magicNumber - (u.i >> 1);
    y = u.f;
    y = y * (1.5F - (x2 * y * y));
    return y;
}

// Function to calculate the square root using the inverse square root
float calculateSqrt(float number, unsigned int magicNumber) {
    float invSqrt = fastInverseSqrt(number, magicNumber);
    return number * invSqrt;
}

// Function to display a single progress bar
void showProgressBar(unsigned int current, unsigned int total) {
    float progress = static_cast<float>(current) / total;
    int pos = static_cast<int>(BAR_WIDTH * progress);
    std::ostringstream bar;
    bar << "[";
    for (int i = 0; i < BAR_WIDTH; ++i) {
        if (i < pos) bar << "=";
        else if (i == pos) bar << ">";
        else bar << " ";
    }
    bar << "] " << std::setw(3) << static_cast<int>(progress * 100.0) << " %";
    std::cout << bar.str();
}

// Function to test a range of magic numbers and find the best one
void testMagicNumbersRange(unsigned int start, unsigned int end, const std::vector<float>& testValues, unsigned int threadIndex) {
    unsigned int totalIterations = end - start;
    for (unsigned int magicNumber = start; magicNumber < end; ++magicNumber) {
        float totalError = 0.0f;
        for (float value : testValues) {
            float approxSqrt = calculateSqrt(value, magicNumber);
            float exactSqrt = std::sqrt(value);
            float error = std::fabs(approxSqrt - exactSqrt);
            totalError += error;
        }
        float averageError = totalError / testValues.size();

        {
            std::lock_guard<std::mutex> guard(mtx);
            if (averageError < globalMinError) {
                globalMinError = averageError;
                globalBestMagicNumber = magicNumber;
            }
        }

        // Update progress
        threadProgress[threadIndex] = magicNumber - start + 1;

        // Show progress for this thread
        if ((magicNumber - start) % 0x10000 == 0) {
            std::lock_guard<std::mutex> guard(mtx);
            MOVE_CURSOR_TO(threadIndex + 8, 1); // Move cursor to the appropriate line
            std::cout << "Thread " << threadIndex + 1 << ": ";
            showProgressBar(threadProgress[threadIndex], totalIterations);
            std::cout.flush();
        }
    }
    threadProgress[threadIndex] = totalIterations; // Mark thread as done
}

// Function to display the header
void showHeader() {
    std::cout << "=========================================n";
    std::cout << " Inverse Square Root Calculation Progressn";
    std::cout << "=========================================n";
    std::cout << "Calculating the optimal magic number forn";
    std::cout << "fast inverse square root approximation.n";
    std::cout << "Processor threads available: " << std::thread::hardware_concurrency() << std::endl;
    std::cout << "Threads used: " << numThreads << std::endl;
    std::cout << "-----------------------------------------n";
}

// Function to display overall progress
void showProgress(unsigned int totalIterations) {
    while (!done) {
        std::this_thread::sleep_for(std::chrono::seconds(SLEEP_DURATION));

        // Display progress for all threads
        std::lock_guard<std::mutex> guard(mtx);
        for (unsigned int i = 0; i < numThreads; ++i) {
            MOVE_CURSOR_TO(i + 8, 1); // Move cursor to the appropriate line
            std::cout << "Thread " << i + 1 << ": ";
            showProgressBar(threadProgress[i], totalIterations);
            std::cout << std::endl;
        }
    }
}

int main() {
    auto start = std::chrono::high_resolution_clock::now(); // Start timing

    std::vector<float> testValues = { 0.5f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 10.0f, 100.0f, 1000.0f };
    //unsigned int startMagicNumber = 0x00000000; // Begin with no idea what it can be
    //unsigned int endMagicNumber = 0xFFFFFFFF; // End with no idea what it can be
    unsigned int startMagicNumber = 0x5F000000; // Approximately close to the real value
    unsigned int endMagicNumber = 0x5FFFFFFF; // Approximately close to the real value
    unsigned int rangeSize = (endMagicNumber - startMagicNumber) / numThreads;
    std::vector<std::thread> threads;

    unsigned int totalIterations = rangeSize;

    // Display initial header
    showHeader();

    std::thread progressThread(showProgress, totalIterations);

    for (unsigned int i = 0; i < numThreads; ++i) {
        unsigned int start = startMagicNumber + i * rangeSize;
        unsigned int end = (i == numThreads - 1) ? endMagicNumber : start + rangeSize;
        threads.emplace_back(testMagicNumbersRange, start, end, std::ref(testValues), i);
    }

    for (auto& t : threads) {
        t.join();
    }

    done = true;
    progressThread.join();

    auto end = std::chrono::high_resolution_clock::now(); // End timing
    std::chrono::duration<double> duration = end - start; // Calculate duration

    std::cout << "nBest magic number: 0x" << std::hex << globalBestMagicNumber << std::dec << std::endl;

    // Test the best magic number found
    for (float value : testValues) {
        float approxSqrt = calculateSqrt(value, globalBestMagicNumber);
        float exactSqrt = std::sqrt(value);
        std::cout << "Value: " << value
                  << ", Approximated sqrt: " << approxSqrt
                  << ", Exact sqrt: " << exactSqrt
                  << ", Error: " << std::fabs(approxSqrt - exactSqrt) << std::endl;
    }

    std::cout << "nTotal execution time: " << duration.count() << " seconds" << std::endl;
    std::cout << "Press any key to exit . . .";
    std::cin.get();

    return 0;
}

I am not sure if there is another way to calculate the magic number without brute force, and I would like to know.

New contributor

vanzuita is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.

Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa Dịch vụ tổ chức sự kiện 5 sao Thông tin về chúng tôi Dịch vụ sinh nhật bé trai Dịch vụ sinh nhật bé gái Sự kiện trọn gói Các tiết mục giải trí Dịch vụ bổ trợ Tiệc cưới sang trọng Dịch vụ khai trương Tư vấn tổ chức sự kiện Hình ảnh sự kiện Cập nhật tin tức Liên hệ ngay Thuê chú hề chuyên nghiệp Tiệc tất niên cho công ty Trang trí tiệc cuối năm Tiệc tất niên độc đáo Sinh nhật bé Hải Đăng Sinh nhật đáng yêu bé Khánh Vân Sinh nhật sang trọng Bích Ngân Tiệc sinh nhật bé Thanh Trang Dịch vụ ông già Noel Xiếc thú vui nhộn Biểu diễn xiếc quay đĩa Dịch vụ tổ chức tiệc uy tín Khám phá dịch vụ của chúng tôi Tiệc sinh nhật cho bé trai Trang trí tiệc cho bé gái Gói sự kiện chuyên nghiệp Chương trình giải trí hấp dẫn Dịch vụ hỗ trợ sự kiện Trang trí tiệc cưới đẹp Khởi đầu thành công với khai trương Chuyên gia tư vấn sự kiện Xem ảnh các sự kiện đẹp Tin mới về sự kiện Kết nối với đội ngũ chuyên gia Chú hề vui nhộn cho tiệc sinh nhật Ý tưởng tiệc cuối năm Tất niên độc đáo Trang trí tiệc hiện đại Tổ chức sinh nhật cho Hải Đăng Sinh nhật độc quyền Khánh Vân Phong cách tiệc Bích Ngân Trang trí tiệc bé Thanh Trang Thuê dịch vụ ông già Noel chuyên nghiệp Xem xiếc khỉ đặc sắc Xiếc quay đĩa thú vị
Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa
Thiết kế website Thiết kế website Thiết kế website Cách kháng tài khoản quảng cáo Mua bán Fanpage Facebook Dịch vụ SEO Tổ chức sinh nhật