I found this source in github to scraping a bitcoin website without cloudflare block:
https://github.com/zfcsoftware/youtube_lessons_resources/blob/main/cloudflare_bypass/index.js
but my code doesn’t refresh session.json file and that means doesnt get new user agent and cookies from cloudflare.
How to fix that issue?
and changed it a little bit to make it faster:
<code>const fs = require('fs');
const axios = require('axios');
const readline = require('readline');
const cheerio = require('cheerio'); // For HTML parsing
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
rl.question('Please enter the href URL (e.g., /solana/xxxxxxxxxx): ', async (hrefUrl) => {
rl.close();
// Page number settings
const baseUrl = 'https://dexscreener.com/solana/page-';
const maxPage = 200;
const concurrentRequests = 10; // Number of concurrent page requests
// Cloudflare bypass and scraping functions
const parseHeaders = (requestString) => {
const headers = requestString.split('n').slice(1, -2);
let userAgent, cookie;
headers.forEach(header => {
const [key, value] = header.split(': ');
if (key.toLowerCase() === 'user-agent') {
userAgent = value;
} else if (key.toLowerCase() === 'cookie') {
cookie = value;
}
});
userAgent = userAgent.replace(/[^tx20-x7ex80-xff]/g, '');
cookie = cookie.replace(/[^tx20-x7ex80-xff]/g, '');
return { userAgent, cookie };
};
const refreshSession = async () => {
try {
const { default: CloudflareScraper } = await import('cloudflare-scraper');
const response = await CloudflareScraper.get(`${baseUrl}1`);
const { userAgent, cookie } = parseHeaders(String(response.request._request._header));
fs.writeFileSync('./session.json', JSON.stringify({
agent: userAgent,
cookie: cookie
}));
console.log('+ Session refreshed.');
} catch (err) {
console.log('- Failed to refresh session:', err.message);
}
};
const scrape = async (url) => {
let session;
try {
session = JSON.parse(fs.readFileSync('./session.json', 'utf-8'));
} catch (err) {
return false;
}
try {
const response = await axios(url, {
withCredentials: true,
headers: {
Cookie: session.cookie,
'User-Agent': session.agent
}
});
console.log('+ Cloudflare successfully bypassed.');
return response.data;
} catch (err) {
console.log('- Failed to bypass Cloudflare.');
return false;
}
};
const findHrefInPages = async () => {
const pageUrls = Array.from({ length: maxPage }, (_, i) => `${baseUrl}${i + 1}`);
// Function to scrape a single page and check for the href
const checkPage = async (pageUrl) => {
console.log(`Scraping: ${pageUrl}`);
const pageContent = await scrape(pageUrl);
if (pageContent && pageContent.includes(hrefUrl)) {
const $ = cheerio.load(pageContent);
const linkElement = $(`a[href="${hrefUrl}"]`);
if (linkElement.length > 0) {
const rankText = linkElement.find('.ds-dex-table-row-badge-pair-no').text().trim();
console.log(`Found: ${hrefUrl} on page ${pageUrl.split('-').pop()}. Rank: ${rankText}`);
return true; // Exit early if found
}
}
return false;
};
// Use Promise.all to scrape multiple pages concurrently
for (let i = 0; i < pageUrls.length; i += concurrentRequests) {
const batchUrls = pageUrls.slice(i, i + concurrentRequests);
const results = await Promise.all(batchUrls.map(checkPage));
if (results.includes(true)) break; // Exit if any page was found
}
};
// Start
await refreshSession();
await findHrefInPages();
// Refresh session at regular intervals
setInterval(async () => {
await refreshSession();
}, 5 * 60 * 1000); // Refresh every 5 minutes
});
</code>
<code>const fs = require('fs');
const axios = require('axios');
const readline = require('readline');
const cheerio = require('cheerio'); // For HTML parsing
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
rl.question('Please enter the href URL (e.g., /solana/xxxxxxxxxx): ', async (hrefUrl) => {
rl.close();
// Page number settings
const baseUrl = 'https://dexscreener.com/solana/page-';
const maxPage = 200;
const concurrentRequests = 10; // Number of concurrent page requests
// Cloudflare bypass and scraping functions
const parseHeaders = (requestString) => {
const headers = requestString.split('n').slice(1, -2);
let userAgent, cookie;
headers.forEach(header => {
const [key, value] = header.split(': ');
if (key.toLowerCase() === 'user-agent') {
userAgent = value;
} else if (key.toLowerCase() === 'cookie') {
cookie = value;
}
});
userAgent = userAgent.replace(/[^tx20-x7ex80-xff]/g, '');
cookie = cookie.replace(/[^tx20-x7ex80-xff]/g, '');
return { userAgent, cookie };
};
const refreshSession = async () => {
try {
const { default: CloudflareScraper } = await import('cloudflare-scraper');
const response = await CloudflareScraper.get(`${baseUrl}1`);
const { userAgent, cookie } = parseHeaders(String(response.request._request._header));
fs.writeFileSync('./session.json', JSON.stringify({
agent: userAgent,
cookie: cookie
}));
console.log('+ Session refreshed.');
} catch (err) {
console.log('- Failed to refresh session:', err.message);
}
};
const scrape = async (url) => {
let session;
try {
session = JSON.parse(fs.readFileSync('./session.json', 'utf-8'));
} catch (err) {
return false;
}
try {
const response = await axios(url, {
withCredentials: true,
headers: {
Cookie: session.cookie,
'User-Agent': session.agent
}
});
console.log('+ Cloudflare successfully bypassed.');
return response.data;
} catch (err) {
console.log('- Failed to bypass Cloudflare.');
return false;
}
};
const findHrefInPages = async () => {
const pageUrls = Array.from({ length: maxPage }, (_, i) => `${baseUrl}${i + 1}`);
// Function to scrape a single page and check for the href
const checkPage = async (pageUrl) => {
console.log(`Scraping: ${pageUrl}`);
const pageContent = await scrape(pageUrl);
if (pageContent && pageContent.includes(hrefUrl)) {
const $ = cheerio.load(pageContent);
const linkElement = $(`a[href="${hrefUrl}"]`);
if (linkElement.length > 0) {
const rankText = linkElement.find('.ds-dex-table-row-badge-pair-no').text().trim();
console.log(`Found: ${hrefUrl} on page ${pageUrl.split('-').pop()}. Rank: ${rankText}`);
return true; // Exit early if found
}
}
return false;
};
// Use Promise.all to scrape multiple pages concurrently
for (let i = 0; i < pageUrls.length; i += concurrentRequests) {
const batchUrls = pageUrls.slice(i, i + concurrentRequests);
const results = await Promise.all(batchUrls.map(checkPage));
if (results.includes(true)) break; // Exit if any page was found
}
};
// Start
await refreshSession();
await findHrefInPages();
// Refresh session at regular intervals
setInterval(async () => {
await refreshSession();
}, 5 * 60 * 1000); // Refresh every 5 minutes
});
</code>
const fs = require('fs');
const axios = require('axios');
const readline = require('readline');
const cheerio = require('cheerio'); // For HTML parsing
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout
});
rl.question('Please enter the href URL (e.g., /solana/xxxxxxxxxx): ', async (hrefUrl) => {
rl.close();
// Page number settings
const baseUrl = 'https://dexscreener.com/solana/page-';
const maxPage = 200;
const concurrentRequests = 10; // Number of concurrent page requests
// Cloudflare bypass and scraping functions
const parseHeaders = (requestString) => {
const headers = requestString.split('n').slice(1, -2);
let userAgent, cookie;
headers.forEach(header => {
const [key, value] = header.split(': ');
if (key.toLowerCase() === 'user-agent') {
userAgent = value;
} else if (key.toLowerCase() === 'cookie') {
cookie = value;
}
});
userAgent = userAgent.replace(/[^tx20-x7ex80-xff]/g, '');
cookie = cookie.replace(/[^tx20-x7ex80-xff]/g, '');
return { userAgent, cookie };
};
const refreshSession = async () => {
try {
const { default: CloudflareScraper } = await import('cloudflare-scraper');
const response = await CloudflareScraper.get(`${baseUrl}1`);
const { userAgent, cookie } = parseHeaders(String(response.request._request._header));
fs.writeFileSync('./session.json', JSON.stringify({
agent: userAgent,
cookie: cookie
}));
console.log('+ Session refreshed.');
} catch (err) {
console.log('- Failed to refresh session:', err.message);
}
};
const scrape = async (url) => {
let session;
try {
session = JSON.parse(fs.readFileSync('./session.json', 'utf-8'));
} catch (err) {
return false;
}
try {
const response = await axios(url, {
withCredentials: true,
headers: {
Cookie: session.cookie,
'User-Agent': session.agent
}
});
console.log('+ Cloudflare successfully bypassed.');
return response.data;
} catch (err) {
console.log('- Failed to bypass Cloudflare.');
return false;
}
};
const findHrefInPages = async () => {
const pageUrls = Array.from({ length: maxPage }, (_, i) => `${baseUrl}${i + 1}`);
// Function to scrape a single page and check for the href
const checkPage = async (pageUrl) => {
console.log(`Scraping: ${pageUrl}`);
const pageContent = await scrape(pageUrl);
if (pageContent && pageContent.includes(hrefUrl)) {
const $ = cheerio.load(pageContent);
const linkElement = $(`a[href="${hrefUrl}"]`);
if (linkElement.length > 0) {
const rankText = linkElement.find('.ds-dex-table-row-badge-pair-no').text().trim();
console.log(`Found: ${hrefUrl} on page ${pageUrl.split('-').pop()}. Rank: ${rankText}`);
return true; // Exit early if found
}
}
return false;
};
// Use Promise.all to scrape multiple pages concurrently
for (let i = 0; i < pageUrls.length; i += concurrentRequests) {
const batchUrls = pageUrls.slice(i, i + concurrentRequests);
const results = await Promise.all(batchUrls.map(checkPage));
if (results.includes(true)) break; // Exit if any page was found
}
};
// Start
await refreshSession();
await findHrefInPages();
// Refresh session at regular intervals
setInterval(async () => {
await refreshSession();
}, 5 * 60 * 1000); // Refresh every 5 minutes
});