Thiết kế website giá rẻ

Question

This post marks the beginning of my ventures into calculating square roots. However, I have no idea how the processor, more specifically the FPU with the fsqrt instruction, performs the process. I have included a better method for measuring time and also made some improvements to the method I wanted to demonstrate in that post, achieving fewer iterations than the original. However, due to the many divisions, it is still a bit slower than the incredible Newton’s method. Therefore, I will post two codes I developed during this time, for you to give your opinion and see what can be improved. Especially regarding the calculation of Carmack’s method.

Here is the improved code from the initial post:

#include <iostream>
#include <iomanip>
#include <cmath>
#include <chrono>
#include <limits>
#include <vector>
#include <random>
#include <algorithm>
#include <numeric>

// Constants
const int NUM_EXECUTIONS = 1000000;
const double precision = 1e-12; // Adjusted precision
bool showNewtonIteration = true;
bool showAnothersIteration = true;

struct IterationData {
    double number;
    double sqrtResult;
    int iterations;
};

// Function to calculate a good initial approximation
double calculateInitial(double number) {
    double initial = (number + 1) * 0.5; // Use multiplication instead of division
    for (int i = 0; i < 5; i++) {
        initial = 0.5 * (initial + number / initial);
    }
    return initial;
}

// Optimized Newton's method for finding the square root
double newtonSqrt(double number, double precision, int &iterations, bool showIteration) {
    double guess = number * 0.5; // Starting guess
    double nextGuess;
    iterations = 0;

    while (true) {
        nextGuess = 0.5 * (guess + number / guess);
        iterations++;
        if (showIteration && showNewtonIteration) {
            std::cout << "Newton's method - ";
            std::cout << std::setprecision(std::numeric_limits<double>::max_digits10) << nextGuess << " and more...";
            if (std::fabs(nextGuess - guess) < precision) {
                std::cout << " reached desired precision";
            }
            std::cout << std::endl;
        }
        if (std::fabs(nextGuess - guess) < precision) {
            break;
        }
        guess = nextGuess;
    }
    return nextGuess;
}

// Iterative another's method with dynamic initial approximation
double anothersMethod(double number, double precision, int &iterations, bool showIteration) {
    double f = calculateInitial(number); // Using the initial approximation
    double e;
    double prev_f = 0;
    iterations = 0;

    while (true) {
        f = 0.5 * (f + number / f); // Recalculate initial dynamically
        e = f - (f * f - number) / (f + number / f); // New formula

        // Check if the value repeats within the precision
        if (std::fabs(f - prev_f) < precision) {
            break;
        }
        prev_f = f;
        f = e;
        iterations++;
        if (showIteration && showAnothersIteration) {
            std::cout << "Another's method - ";
            std::cout << std::setprecision(std::numeric_limits<double>::max_digits10) << f << " and more...";
            if (std::fabs(f - prev_f) < precision) {
                std::cout << " reached desired precision";
            }
            std::cout << std::endl;
        }
    }
    return f;
}

// Function to calculate the error
double calculateError(double root, double number) {
    double square = root * root;
    return std::fabs(square - number);
}

void showIterations(double number, double &resultNewton, double &resultAnothers, int &iterationsNewton, int &iterationsAnothers) {
    resultNewton = newtonSqrt(number, precision, iterationsNewton, true);
    std::cout << std::setprecision(std::numeric_limits<double>::max_digits10)
              << "Newton's method final result: " << resultNewton << " reached desired precision, Iterations: " << iterationsNewton << "nn";

    resultAnothers = anothersMethod(number, precision, iterationsAnothers, true);
    std::cout << std::setprecision(std::numeric_limits<double>::max_digits10)
              << "Another's method final result: " << resultAnothers << " reached desired precision, Iterations: " << iterationsAnothers << "nn";
}

void measureTime(double* randomNumbers, std::vector<double> &timesSqrt, std::vector<double> &timesNewton, std::vector<double> &timesAnothers,
                 std::vector<IterationData> &newtonData, std::vector<IterationData> &anothersData) {
    // Measuring time for standard sqrt function
    std::cout << "Calculating times...n";
    auto startSqrt = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < NUM_EXECUTIONS; i++) {
        volatile double result = std::sqrt(randomNumbers[i]);
    }
    auto endSqrt = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::milli> durationSqrt = endSqrt - startSqrt;
    timesSqrt.push_back(durationSqrt.count());

    // Measuring time for Newton's pure method
    auto startNewton = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < NUM_EXECUTIONS; i++) {
        int iterationsNewton = 0;
        double result = newtonSqrt(randomNumbers[i], precision, iterationsNewton, false);
        newtonData.push_back({randomNumbers[i], result, iterationsNewton});
    }
    auto endNewton = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::milli> durationNewton = endNewton - startNewton;
    timesNewton.push_back(durationNewton.count());

    // Measuring time for the another's method
    auto startAnothers = std::chrono::high_resolution_clock::now();
    for (int i = 0; i < NUM_EXECUTIONS; i++) {
        int iterationsAnothers = 0;
        double result = anothersMethod(randomNumbers[i], precision, iterationsAnothers, false);
        anothersData.push_back({randomNumbers[i], result, iterationsAnothers});
    }
    auto endAnothers = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double, std::milli> durationAnothers = endAnothers - startAnothers;
    timesAnothers.push_back(durationAnothers.count());
}

void displayAverageTimes(const std::vector<double> &timesSqrt, const std::vector<double> &timesNewton, const std::vector<double> &timesAnothers) {
    double avgSqrt = std::accumulate(timesSqrt.begin(), timesSqrt.end(), 0.0) / timesSqrt.size();
    double avgNewton = std::accumulate(timesNewton.begin(), timesNewton.end(), 0.0) / timesNewton.size();
    double avgAnothers = std::accumulate(timesAnothers.begin(), timesAnothers.end(), 0.0) / timesAnothers.size();

    std::cout << "nAverage Results:n";
    std::cout << std::scientific << std::setprecision(6); // Ensure values are displayed in scientific notation
    std::cout << "Standard sqrt average time (ms): " << avgSqrt << std::endl;
    std::cout << "Newton's method average time (ms): " << avgNewton << std::endl;
    std::cout << "Another's method average time (ms): " << avgAnothers << std::endl;
}

void calculateAndDisplayErrors(double number, const std::vector<double> &roots) {
    double actualSqrt = std::sqrt(number);
    std::cout << "nError comparison with actual sqrt(" << number << "): " << std::setprecision(std::numeric_limits<double>::max_digits10) << actualSqrt << "n";
    for (size_t i = 0; i < roots.size(); ++i) {
        double error = calculateError(roots[i], number);
        std::cout << "Error for root " << i + 1 << " (" << std::setprecision(std::numeric_limits<double>::max_digits10) << roots[i] << "): " << error << "n";
    }
}

void displaySomeIterations(const std::vector<IterationData> &newtonData, const std::vector<IterationData> &anothersData) {
    std::vector<IterationData> sortedNewtonData = newtonData;
    std::vector<IterationData> sortedAnothersData = anothersData;

    // Sort the data based on the number of iterations
    std::sort(sortedNewtonData.begin(), sortedNewtonData.end(), [](const IterationData &a, const IterationData &b) {
        return a.iterations > b.iterations;
    });

    std::sort(sortedAnothersData.begin(), sortedAnothersData.end(), [](const IterationData &a, const IterationData &b) {
        return a.iterations > b.iterations;
    });

    std::cout << "nSome random numbers and their iterations for Newton's method:n";
    for (size_t i = 0; i < 10 && i < sortedNewtonData.size(); ++i) {
        std::cout << "Number: " << sortedNewtonData[i].number
                  << ", Iterations: " << sortedNewtonData[i].iterations
                  << ", Sqrt: " << sortedNewtonData[i].sqrtResult << "n";
    }

    std::cout << "nSome random numbers and their iterations for Another's method:n";
    for (size_t i = 0; i < 10 && i < sortedAnothersData.size(); ++i) {
        std::cout << "Number: " << sortedAnothersData[i].number
                  << ", Iterations: " << sortedAnothersData[i].iterations
                  << ", Sqrt: " << sortedAnothersData[i].sqrtResult << "n";
    }
}

void clearScreen() {
    #ifdef _WIN32
        system("cls");
    #else
        system("clear");
    #endif
}

int main() {
    double number;

    std::cout << "Enter the number to find the square root: ";
    std::cin >> number;

    // Show iterations for each method with user input
    double resultNewton, resultAnothers;
    int iterationsNewton, iterationsAnothers;
    showIterations(number, resultNewton, resultAnothers, iterationsNewton, iterationsAnothers);

    std::cout << "Press any key to continue . . .";
    std::cin.get();
    std::cin.get();

    std::vector<double> timesSqrt, timesNewton, timesAnothers;
    std::vector<IterationData> newtonData, anothersData;

    double* randomNumbers = new double[NUM_EXECUTIONS];
    std::mt19937_64 rng;
    std::uniform_real_distribution<double> dist(1.0, 100.0); // Random numbers between 1 and 100

    for (int i = 0; i < NUM_EXECUTIONS; i++) {
        randomNumbers[i] = dist(rng);
    }

    measureTime(randomNumbers, timesSqrt, timesNewton, timesAnothers, newtonData, anothersData);

    delete[] randomNumbers;

    std::cout << "Press any key to continue to error calculations . . .";
    std::cin.get();

    // Display the error calculations
    std::vector<double> roots = {resultNewton, resultAnothers};
    calculateAndDisplayErrors(number, roots);

    std::cout << "Press any key to continue to time measurements . . .";
    std::cin.get();

    // Display the average times
    displayAverageTimes(timesSqrt, timesNewton, timesAnothers);

    std::cout << "Press any key to continue to top iterations display . . .";
    std::cin.get();

    // Display the top iterations
    std::cout << "Some random numbers and their iterations:n";
    displaySomeIterations(newtonData, anothersData);

    std::cout << "Press any key to exit . . .";
    std::cin.get();

    return 0;
}

Here is the code to calculate the magic number:

#include <iostream>
#include <iomanip>
#include <cmath>
#include <limits>
#include <vector>
#include <thread>
#include <mutex>
#include <chrono>
#include <atomic>
#include <sstream>

// Uncomment the following line to compile for Linux
// #define LINUX

#ifdef LINUX
#include <unistd.h>
#define MOVE_CURSOR_TO(x, y) std::cout << "33[" << (x) << ";" << (y) << "H"
#else
#include <windows.h>
#include <conio.h>
void moveCursorTo(int x, int y) {
    COORD coord = {static_cast<SHORT>(y), static_cast<SHORT>(x)};
    SetConsoleCursorPosition(GetStdHandle(STD_OUTPUT_HANDLE), coord);
}
#define MOVE_CURSOR_TO(x, y) moveCursorTo((x), (y))
#endif

const int BAR_WIDTH = 70; // Width of the progress bar
const int SLEEP_DURATION = 1; // Duration to sleep between progress updates in seconds
std::mutex mtx;
float globalMinError = std::numeric_limits<float>::max();
unsigned int globalBestMagicNumber = 0;
std::atomic<bool> done(false);
unsigned int numThreads = std::thread::hardware_concurrency(); // Determine the number of threads based on the processor
std::vector<unsigned int> threadProgress(numThreads, 0);

// Function to calculate the inverse square root using Carmack's method with an adjustable magic number
float fastInverseSqrt(float number, unsigned int magicNumber) {
    float x2 = number * 0.5F;
    float y = number;
    union {
        float f;
        int i;
    } u;
    u.f = y;
    u.i = magicNumber - (u.i >> 1);
    y = u.f;
    y = y * (1.5F - (x2 * y * y));
    return y;
}

// Function to calculate the square root using the inverse square root
float calculateSqrt(float number, unsigned int magicNumber) {
    float invSqrt = fastInverseSqrt(number, magicNumber);
    return number * invSqrt;
}

// Function to display a single progress bar
void showProgressBar(unsigned int current, unsigned int total) {
    float progress = static_cast<float>(current) / total;
    int pos = static_cast<int>(BAR_WIDTH * progress);
    std::ostringstream bar;
    bar << "[";
    for (int i = 0; i < BAR_WIDTH; ++i) {
        if (i < pos) bar << "=";
        else if (i == pos) bar << ">";
        else bar << " ";
    }
    bar << "] " << std::setw(3) << static_cast<int>(progress * 100.0) << " %";
    std::cout << bar.str();
}

// Function to test a range of magic numbers and find the best one
void testMagicNumbersRange(unsigned int start, unsigned int end, const std::vector<float>& testValues, unsigned int threadIndex) {
    unsigned int totalIterations = end - start;
    for (unsigned int magicNumber = start; magicNumber < end; ++magicNumber) {
        float totalError = 0.0f;
        for (float value : testValues) {
            float approxSqrt = calculateSqrt(value, magicNumber);
            float exactSqrt = std::sqrt(value);
            float error = std::fabs(approxSqrt - exactSqrt);
            totalError += error;
        }
        float averageError = totalError / testValues.size();

        {
            std::lock_guard<std::mutex> guard(mtx);
            if (averageError < globalMinError) {
                globalMinError = averageError;
                globalBestMagicNumber = magicNumber;
            }
        }

        // Update progress
        threadProgress[threadIndex] = magicNumber - start + 1;

        // Show progress for this thread
        if ((magicNumber - start) % 0x10000 == 0) {
            std::lock_guard<std::mutex> guard(mtx);
            MOVE_CURSOR_TO(threadIndex + 8, 1); // Move cursor to the appropriate line
            std::cout << "Thread " << threadIndex + 1 << ": ";
            showProgressBar(threadProgress[threadIndex], totalIterations);
            std::cout.flush();
        }
    }
    threadProgress[threadIndex] = totalIterations; // Mark thread as done
}

// Function to display the header
void showHeader() {
    std::cout << "=========================================n";
    std::cout << " Inverse Square Root Calculation Progressn";
    std::cout << "=========================================n";
    std::cout << "Calculating the optimal magic number forn";
    std::cout << "fast inverse square root approximation.n";
    std::cout << "Processor threads available: " << std::thread::hardware_concurrency() << std::endl;
    std::cout << "Threads used: " << numThreads << std::endl;
    std::cout << "-----------------------------------------n";
}

// Function to display overall progress
void showProgress(unsigned int totalIterations) {
    while (!done) {
        std::this_thread::sleep_for(std::chrono::seconds(SLEEP_DURATION));

        // Display progress for all threads
        std::lock_guard<std::mutex> guard(mtx);
        for (unsigned int i = 0; i < numThreads; ++i) {
            MOVE_CURSOR_TO(i + 8, 1); // Move cursor to the appropriate line
            std::cout << "Thread " << i + 1 << ": ";
            showProgressBar(threadProgress[i], totalIterations);
            std::cout << std::endl;
        }
    }
}

int main() {
    auto start = std::chrono::high_resolution_clock::now(); // Start timing

    std::vector<float> testValues = { 0.5f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 10.0f, 100.0f, 1000.0f };
    //unsigned int startMagicNumber = 0x00000000; // Begin with no idea what it can be
    //unsigned int endMagicNumber = 0xFFFFFFFF; // End with no idea what it can be
    unsigned int startMagicNumber = 0x5F000000; // Approximately close to the real value
    unsigned int endMagicNumber = 0x5FFFFFFF; // Approximately close to the real value
    unsigned int rangeSize = (endMagicNumber - startMagicNumber) / numThreads;
    std::vector<std::thread> threads;

    unsigned int totalIterations = rangeSize;

    // Display initial header
    showHeader();

    std::thread progressThread(showProgress, totalIterations);

    for (unsigned int i = 0; i < numThreads; ++i) {
        unsigned int start = startMagicNumber + i * rangeSize;
        unsigned int end = (i == numThreads - 1) ? endMagicNumber : start + rangeSize;
        threads.emplace_back(testMagicNumbersRange, start, end, std::ref(testValues), i);
    }

    for (auto& t : threads) {
        t.join();
    }

    done = true;
    progressThread.join();

    auto end = std::chrono::high_resolution_clock::now(); // End timing
    std::chrono::duration<double> duration = end - start; // Calculate duration

    std::cout << "nBest magic number: 0x" << std::hex << globalBestMagicNumber << std::dec << std::endl;

    // Test the best magic number found
    for (float value : testValues) {
        float approxSqrt = calculateSqrt(value, globalBestMagicNumber);
        float exactSqrt = std::sqrt(value);
        std::cout << "Value: " << value
                  << ", Approximated sqrt: " << approxSqrt
                  << ", Exact sqrt: " << exactSqrt
                  << ", Error: " << std::fabs(approxSqrt - exactSqrt) << std::endl;
    }

    std::cout << "nTotal execution time: " << duration.count() << " seconds" << std::endl;
    std::cout << "Press any key to exit . . .";
    std::cin.get();

    return 0;
}

I am not sure if there is another way to calculate the magic number without brute force, and I would like to know.

Thiết kế website giá rẻ

Danh mục

Studies on Square Roots