Thiết kế website giá rẻ

Question

I have a requirement to

take a bunch of id’s from the user (100 or more)
create a URL from those id’s
open each URL as a separate tab and scrape info from it
store all the scrapped info as a json obj
write the json obj into a csv file

I am trying to achieve this by running a js script in my chromedevtools environment, however keep ending up with Error code: SIGILL
Error code: SIGILL after the script performs the above actions for 20-30 Id’s

how do I resolve this

My code is as follows:-

// Function to prompt user for input and convert it to an array of strings
function promptAndConvertToArray() {
    // Prompt the user for input
    const userInput = prompt('Enter a list of IDs separated by new lines:');

    // Check if user clicked cancel or entered nothing
    if (!userInput) {
        console.log('No input provided.');
        return [];
    }

    // Split the input string by new line characters to get an array
    const idArray = userInput.split('n').map(id => id.trim());

    return idArray;
}

    // Function to generate links
function generateLinks(ids) {
    const prefix = ''; // removing the prefixes as its proprietary info
    const suffix = ''; // removing the suffixes as its proprietary info

    return ids.map(id => prefix + id + suffix);
}

// Function to write output to a CSV file
function downloadCSV(data) {
    const csvHeaders = ['Link'];
    const csvRows = [];

    // Extract all possible keys from the inner objects
    const allKeys = Object.values(data).reduce((keys, innerData) => {
        return keys.concat(Object.keys(innerData.spans));
    }, []);

    // Remove duplicate keys and sort them
    const uniqueKeys = [...new Set(allKeys)].sort();

    // Add headers for each key in the CSV
    csvHeaders.push(...uniqueKeys);

    // Generate rows for each link
    for (const [link, innerData] of Object.entries(data)) {
        const row = [link];

        // Populate row with values for each key
        for (const key of uniqueKeys) {
            row.push(innerData.spans[key] || '');
            console.log(row)
        }

        csvRows.push(row.join(','));
    }

    // Combine headers and rows
    const csvContent = [csvHeaders.join(','), ...csvRows].join('n');

    // Create CSV file and download
    const blob = new Blob([csvContent], { type: 'text/csv' });
    const url = URL.createObjectURL(blob);

    const link = document.createElement('a');
    link.href = url;
    link.download = 'data.csv';
    document.body.appendChild(link);
    link.click();
    document.body.removeChild(link);
    URL.revokeObjectURL(url);
}

// Function to scrape data from a single page
async function scrapeDataFromPage(link) {
    // Open a new window or tab for the link (you may need to adjust this depending     on your environment)
    const page = window.open(link);

    // Wait for the page to load (you might need to add more sophisticated waiting logic)
    await new Promise(resolve => setTimeout(resolve, 1800000)); // Adjust the delay as needed

    // Inject the scrapeData function into the opened window/tab
    await page.eval(`
        function scrapeData() {
            const spanElements = document.querySelectorAll('span.classname');

            const data = {
                spans: {}
            };

            // Scrape data from span elements and count unique elements
            spanElements.forEach(span => {
                const text = span.textContent.trim();
                data.spans[text] = (data.spans[text] || 0) + 1;
            });

            return data;
        }
`    );

    // Extract the scraped data using the injected function
    const scrapedData = await page.eval('scrapeData()');

    // Close the current page
    page.close();

    return scrapedData;
}

// Function to throttle requests
function throttle(delay) {
    let lastCall = 0;
    return function(fn) {
        const now = Date.now();
        if (now - lastCall < delay) {
            return new Promise(resolve => setTimeout(() => resolve(fn()), delay -   (now - lastCall)));
        }
        lastCall = now;
        return fn();
    };
}

 // Function to scrape data from multiple webpages in parallel
async function scrapeDataFromMultiplePages(links) {
    //const throttledScrape = throttle(10000); // Throttle to one request per second
    //const scrapePromises = links.map(link => throttledScrape(() => scrapeDataFromPage(link)));
    const scrapePromises = links.map(link => scrapeDataFromPage(link));

    // Wait for all scraping tasks to finish
    const scrapedDataArray = await Promise.all(scrapePromises);

    const resultObject = {};

    // Combine results from all scraping tasks into a single object
    links.forEach((link, index) => {
        resultObject[link.split('-')[1]] = scrapedDataArray[index];
        console.log(resultObject)
    });

    return resultObject;
}

// Example usage:
(async function() {
    sessionStorage.clear()
    const ids = promptAndConvertToArray();
    const links = generateLinks(ids);
    const batchSize = 9;

    // Initialize session storage
    sessionStorage.setItem('scrapedData', JSON.stringify({}));

    for (let i = 0; i < links.length; i += batchSize) {
        const batchLinks = links.slice(i, i + batchSize); // Get a batch of links

        try {
            // Scrape data for the current batch
            const result = await scrapeDataFromMultiplePages(batchLinks);

            // Merge the result into the session storage
            const storedData = JSON.parse(sessionStorage.getItem('scrapedData'));
            Object.assign(storedData, result);
            sessionStorage.setItem('scrapedData', JSON.stringify(storedData));
        } catch (error) {
            console.error('Error:', error);
        }
    }

    // Once all scraping is done, retrieve data from session storage and download CSV
    const scrapedData = JSON.parse(sessionStorage.getItem('scrapedData'));
    console.log(scrapedData);
    downloadCSV(scrapedData);
})();

Thiết kế website giá rẻ

Danh mục

Running my script in ChromeDevTools getting Error code: SIGILL, how do I resolve it