I want to scrap the “people” page of a company on Linkedin. So there is a section that have a list of cards of workers. It is necessary to do automatic scroll to get all the workers. I’m using selenium both for navigation and for scraping. So if there is no connection to the user, it would simply go to the next <li>. If there is a connection, it would click on the link, go to the user’s profile, get all the data, save this data on database and go back to the “people” page and go to the next <li>… but in the print that I used as debugger, it just prints the name and position of the first on the list, the first <li> as if I was doing f”ind_element”, instead of “find_elements”. Also, I guess that there is a chance that the automatic scroll may be wrong, but I don’t know what is wrong.
def faz_consulta():
url = entrada.get()
driver.get(url)
driver.maximize_window()
driver.implicitly_wait(30)
driver.save_screenshot('PAGINALINKEDIN.png')
url_people = url + '/people/'
driver.get(url_people)
workers = driver.find_elements(By.XPATH, "//ul[@class = 'display-flex list-style-none flex-wrap']//li[@class = 'grid grid__col--lg-8 block org-people-profile-card__profile-card-spacing']")
for worker in workers:
driver.implicitly_wait(10)
linkedin_user = driver.find_element(By.XPATH, "//div[@class = 'org-people-profile-card__profile-info']//div[@class = 'ember-view lt-line-clamp lt-line-clamp--single-line org-people-profile-card__profile-title t-black']").text
print(linkedin_user)
position_linkedin_user = driver.find_element(By.XPATH, "//div[@class = 'org-people-profile-card__profile-info']//div[@class = 'ember-view lt-line-clamp lt-line-clamp--multi-line']").text
print(position_linkedin_user
if(linkedin_user != "Usuário do LinkedIn"):
worker_card = driver.find_element(By.XPATH, "//div[@class = 'org-people-profile-card__profile-info']")
worker_link = funcionario_card.find_element(By.TAG_NAME, 'a').click()
print('user's profile')
driver.save_screenshot('PERFILLINKEDIN.png')
driver.implicitly_wait(10)
name_worker = driver.find_element(By.CLASS_NAME,"text-heading-xlarge inline t-24 v-align-middle break-words").text
position_worker = driver.find_element(By.CLASS_NAME, "text-body-medium break-words").text
experiences = driver.find_elements(By.XPATH, "//section[@class = 'artdeco-card pv-profile-card break-words mt2']//ul[@class = 'bYpMhDbGeHrPlJHJoKkUOYvSmZyJIyEvfrLc']//li[@class = 'artdeco-list__item MHGhNRxrFEYePDVElXiPzsGcfCtVjWRE UHceesMDBsAFbVbsOghsJALmURLeFDeg']")
first_experience = experiences[0]
experience_position_initial_inicio = primeira_experiencia.find_element(By.XPATH, "//div[@class = 'display-flex flex-column full-width align-self-center']//span[@class = 't-14 t-normal t-black--light'//span[@class = 'pvs-entity__caption-wrapper']").text
cur.execute('INSERT INTO tbl_Empresa_inkedin_Funcionario (Name, Position, initial_position) VALUES (%s, %s, %s)',( name_worker, worker_position, experience_position_initial_inicio.split('-')[0]))
con.commit()
driver.back()
previous_page_height = driver.execute_script("return document.body.scrollHeight")
new_page_height = driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
if new_page_height == previous_page_height:
break
previous_page_height = new_page_height