Iam using nodriver and I have some problems with reading elements that are getting loaded dynamically and with reading some Javascript, since I cant really “wait” for the Javascript.
nodriver has “wait()” (https://ultrafunkamsterdam.github.io/nodriver/nodriver/classes/tab.html#nodriver.Tab.wait) but its not working in both cases.
With the help of Gemini I created these:
async def wait_for_complete_page_loaded(page, timeout=None):
"""
Waits until the entire page, including all resources, has finished loading.
:param page: The nodriver Page object of the page to monitor.
:param timeout: Maximum wait time in seconds. Default is "None" (8-15 seconds).
:raises TimeoutError: If the page doesn't fully load within the specified time.
"""
if timeout is None:
timeout = random.uniform(8, 15)
try:
await asyncio.wait_for(
page.evaluate(
"""
() => new Promise((resolve) => {
if (document.readyState === 'complete') {
resolve();
} else {
window.addEventListener('load', resolve);
}
})
""",
await_promise=True
),
timeout=timeout
)
except asyncio.TimeoutError:
raise TimeoutError(f"Page did not completely load within {timeout} s")
async def wait_for_dom_content_loaded(page, timeout=None):
"""
Waits until the initial HTML document has been completely loaded and parsed.
:param page: The nodriver Page object of the page to monitor.
:param timeout: Maximum wait time in seconds. Default is "None" (8-15 seconds).
:raises TimeoutError: If the DOM content is not loaded within the specified time.
"""
if timeout is None:
timeout = random.uniform(8, 15)
try:
await asyncio.wait_for(
page.evaluate(
"""
() => new Promise((resolve) => {
if (document.readyState === 'interactive' || document.readyState === 'complete') {
resolve();
} else {
document.addEventListener('DOMContentLoaded', resolve);
}
})
""",
await_promise=True
),
timeout=timeout
)
except asyncio.TimeoutError:
raise TimeoutError(f"DOM content was not loaded within {timeout} s")
async def wait_for_network_idle(page, timeout=None):
"""
Waits until the page reaches a state of network inactivity.
This function monitors network activities on the page and considers the page "idle"
when no new network requests have been detected for 500 milliseconds.
:param page: The nodriver Page object of the page to monitor.
:param timeout: Maximum wait time in seconds. Default is "None" (8-15 seconds).
:raises TimeoutError: If the page doesn't become idle within the specified time.
"""
if timeout is None:
timeout = random.uniform(8, 15)
try:
await asyncio.wait_for(
page.evaluate(
"""
() => new Promise((resolve) => {
let lastNetworkActivity = Date.now();
const observer = new PerformanceObserver((list) => {
lastNetworkActivity = Date.now();
});
observer.observe({ entryTypes: ['resource'] });
const checkIdle = () => {
if (Date.now() - lastNetworkActivity > 1000) {
observer.disconnect();
resolve();
} else {
setTimeout(checkIdle, 100);
}
};
setTimeout(checkIdle, 100);
})
""",
await_promise=True
),
timeout=timeout
)
except asyncio.TimeoutError:
raise TimeoutError(f"Network did not become idle within {timeout} s")
async def wait_for_dom_stability(page, timeout=None):
"""
Waits until the DOM of the page reaches a stable state.
This function monitors changes to the DOM and considers it "stable"
when no changes have been detected for 1 second.
:param page: The nodriver Page object of the page to monitor.
:param timeout: Maximum wait time in seconds. Default is "None" (8-15 seconds).
:raises TimeoutError: If the DOM doesn't stabilize within the specified time.
"""
if timeout is None:
timeout = random.uniform(8, 15)
try:
await asyncio.wait_for(
page.evaluate(
"""
() => new Promise((resolve) => {
let timeout;
const observer = new MutationObserver(() => {
clearTimeout(timeout);
timeout = setTimeout(() => {
observer.disconnect();
resolve();
}, 1000);
});
observer.observe(document.body, {
childList: true,
subtree: true,
attributes: true,
characterData: true
});
timeout = setTimeout(() => {
observer.disconnect();
resolve();
}, 1000);
})
""",
await_promise=True
),
timeout=timeout
)
except asyncio.TimeoutError:
raise TimeoutError(f"DOM did not stabilize within {timeout} s")
async def wait_for_page_stability(page, timeout=None):
"""
Waits until the page reaches both network inactivity and DOM stability.
This function combines monitoring of network activities and DOM changes.
It considers the page "stable" when neither network activities nor DOM changes
have been detected for 1 second.
:param page: The nodriver Page object of the page to monitor.
:param timeout: Maximum wait time in seconds. Default is "None" (8-15 seconds).
:raises TimeoutError: If the page doesn't stabilize within the specified time.
"""
if timeout is None:
timeout = random.uniform(8, 15)
try:
await asyncio.wait_for(
page.evaluate(
"""
() => new Promise((resolve) => {
let lastActivity = Date.now();
let networkTimeout, domTimeout;
const performanceObserver = new PerformanceObserver(() => {
lastActivity = Date.now();
});
performanceObserver.observe({ entryTypes: ['resource'] });
const mutationObserver = new MutationObserver(() => {
lastActivity = Date.now();
});
mutationObserver.observe(document.body, {
childList: true,
subtree: true,
attributes: true,
characterData: true
});
const checkStability = () => {
if (Date.now() - lastActivity > 1000) {
performanceObserver.disconnect();
mutationObserver.disconnect();
resolve();
} else {
setTimeout(checkStability, 100);
}
};
setTimeout(checkStability, 100);
})
""",
await_promise=True
),
timeout=timeout
)
except asyncio.TimeoutError:
raise TimeoutError(f"Page did not stabilize within {timeout} s")
And this for a specific javascript variable to get created and populated:
async def wait_for_javascript_data_load(self, timeout=10):
try:
await asyncio.wait_for(
self.page.evaluate(
"""
() => new Promise((resolve) => {
const checkJS = () => {
if (typeof window.current_variable !== 'undefined' &&
window.current_variable.hasOwnProperty('id') &&
window.current_variable.hasOwnProperty('name')) {
resolve();
} else {
setTimeout(checkJS, 100);
}
};
checkJS();
})
"""
),
timeout=timeout
)
except asyncio.TimeoutError:
self.logger.debug("Timeout while waiting for data to load.")
But “wait_for_javascript_data_load” seems not to wait for it and not to wait for the timeout.
Maybe this asyncio.wait_for() with my page.evaluate() approach is not working like that… Maybe Promises are not working in page.evaluate properly?!
Do you know something about this? Do you have an idea?