I’m trying to scrape data from a webpage using JavaScript, but I’m encountering difficulties because the elements I need to extract don’t have specific classes or IDs that I can use as selectors.
I’m working on a Chrome extension to extract car details from a website (Otomoto). The problem is that the HTML structure of the page doesn’t include unique identifiers (classes or IDs) for the elements containing the data I need. Here’s an example of the HTML structure:
<div data-testid="advert-details-item" class="ooa-162vy3d e18eslyg3">
<p class="e18eslyg4 ooa-12b2ph5">Marka pojazdu</p>
<div class="e1kfpccp0 ooa-1pe3502">Lexus</div>
</div>
<div data-testid="advert-details-item" class="ooa-162vy3d e18eslyg3">
<p class="e18eslyg4 ooa-12b2ph5">Model pojazdu</p>
<div class="e1kfpccp0 ooa-1pe3502">RX</div>
</div>
<!-- More similar blocks -->
Here’s the JavaScript code I’m using:
chrome.runtime.onMessage.addListener(function (msg, sender, sendResponse) {
if (msg.text === 'report_back') {
let data = {};
data.url = window.location.toString();
try {
let priceElement = document.querySelector('.offer-price__number');
if (priceElement) {
let priceText = priceElement.innerText;
let numb = priceText.match(/d+/g).join("");
data.cena = numb;
} else {
data.cena = 'N/A';
}
let currencyElement = document.querySelector('.offer-price__currency');
if (currencyElement) {
data.waluta = currencyElement.innerText.trim();
} else {
data.waluta = 'N/A';
}
let params = [];
let elements = document.querySelectorAll('[data-testid="advert-details-item"]');
for (let i = 0; i < elements.length; i++) {
let nameElement = elements[i].querySelector('p[class*="ooa-12b2ph5"]');
let valueElement = elements[i].querySelector('div[class*="ooa-1pe3502"]');
if (nameElement && valueElement) {
let name = nameElement.innerText.trim();
let value = valueElement.innerText.trim();
params.push({ 'name': name, 'value': value });
}
}
params.forEach(param => {
if (param.name.includes('Marka pojazdu')) data.marka = param.value;
if (param.name.includes('Model pojazdu')) data.model = param.value;
// Add more conditions as needed
});
let fieldsOrder = ['cena', 'waluta', 'marka', 'model', 'url'];
let orderedRow = {};
fieldsOrder.forEach(field => orderedRow[field] = data[field]);
sendResponse(JSON.stringify(orderedRow));
} catch (error) {
sendResponse(JSON.stringify({ error: error.message }));
}
}
});
Issues:
The elements I need to extract don’t have unique identifiers.
The current selectors don’t always work, resulting in undefined values for some parameters.
Michał Godlewski is a new contributor to this site. Take care in asking for clarification, commenting, and answering.
Check out our Code of Conduct.