I am downloading a csv from let’s say the webpage here:
https://iieg.gob.mx/plataforma_seguridad/#/laboratorio
Why is it though the script below returns every value when I download the csv via selenium, except for vaules in columns x, y ?
extractor = SeleniumExtractor({"url": URL, "browser_interactions": []})
driver = extractor.driver
iframe = driver.find_element(By.TAG_NAME, "iframe")
driver.switch_to.frame(iframe)
WebDriverWait(driver, 30).until(EC.invisibility_of_element_located((By.ID, "shiny-modal")))
time.sleep(20)
df = None
tries = 0
while ((df is None) or df.empty or (df.shape[0] < 200000)) and tries < 4:
WebDriverWait(driver, 20).until(
EC.element_to_be_clickable(
driver.find_element(By.CSS_SELECTOR, "input.form-control:nth-child(1)")
)
)
driver.find_element(
By.CSS_SELECTOR, "input.form-control:nth-child(1)"
).send_keys("01/01/2017")
WebDriverWait(driver, 20).until(
EC.element_to_be_clickable(driver.find_element(By.ID, "apply"))
)
driver.find_element(By.ID, "apply").click()
WebDriverWait(driver, 20).until(
EC.element_to_be_clickable(driver.find_element(By.ID, "dwnld"))
)
link = driver.find_element(By.ID, "dwnld").get_attribute("href")
df = pd.read_csv(link, encoding="utf-8")
if df is None or df.empty:
raise Exception("Jalisco extraction failed!")
df["fecha_hora"] = pd.to_datetime(df["fecha"] + " " + df["hora"] + ":00", errors="coerce")
print(df[['x', 'y']].head())