Thiết kế website giá rẻ

Question

I’m trying to scrape some linkedin user’s profile. So it goes to the “people” page and goes one by one to get the information and save on database. If the profile’s not public it will simply move on… if the profile’s public it will click on the profile’s card and extract the information on the profile’s page. But when I print inside my “for” it only prints the first profile found, it doesn’t iterate at all. Also, I’m clueless on how to do the automatic scroll. Here is the code:

def run_query():
    url = input_field.get()
    if(url == '' or not url.startswith("https://www.linkedin.com/company/")):
        CTkMessagebox(title="Info", icon="cancel", message="Empty or invalid URL!")
        return
    driver.get(url)
    driver.maximize_window()
    driver.implicitly_wait(30)
    print(url)
    driver.save_screenshot('LINKEDINPAGE.png')
    
    company_title = driver.find_element(By.XPATH, "//div[@class='relative']//p[@class='org-top-card-summary__tagline']").text
    print(company_title)
    
    industry = driver.find_element(By.CLASS_NAME, 'org-top-card-summary-info-list__info-item').text
    print(industry)
    
    people_url = url + '/people/'
    
    driver.get(people_url)
    driver.implicitly_wait(10)
    previous_page_height = driver.execute_script("return document.body.scrollHeight")
    
    employees = driver.find_elements(By.XPATH, "//ul[@class='display-flex list-style-none flex-wrap']//li[@class='grid grid__col--lg-8 block org-people-profile-card__profile-card-spacing']")
    
    for employee in employees:
        driver.save_screenshot("employees.png")
        
        linkedin_user = employee.find_element(By.XPATH, "//div[@class='org-people-profile-card__profile-info']//div[@class='ember-view lt-line-clamp lt-line-clamp--single-line org-people-profile-card__profile-title t-black']").text
        print(linkedin_user)
        
        linkedin_user_position = driver.find_element(By.XPATH, "//div[@class='org-people-profile-card__profile-info']//div[@class='ember-view lt-line-clamp lt-line-clamp--multi-line']").text
        print(linkedin_user_position)
        
        if(linkedin_user != "LinkedIn Member"):
            employee_card = driver.find_element(By.XPATH, "//div[@class='org-people-profile-card__profile-info']")
            employee_link = employee_card.find_element(By.TAG_NAME, 'a').click()
            print('entered the profile')
            driver.save_screenshot('LINKEDINPROFILE.png')
            driver.implicitly_wait(10)
            
            employee_name = driver.find_element(By.CLASS_NAME, "text-heading-xlarge inline t-24 v-align-middle break-words").text
            employee_position = driver.find_element(By.CLASS_NAME, "text-body-medium break-words").text
            
            experiences = driver.find_elements(By.XPATH, "//section[@class='artdeco-card pv-profile-card break-words mt2']//ul[@class='bYpMhDbGeHrPlJHJoKkUOYvSmZyJIyEvfrLc']//li[@class='artdeco-list__item MHGhNRxrFEYePDVElXiPzsGcfCtVjWRE UHceesMDBsAFbVbsOghsJALmURLeFDeg']")
            first_experience = experiences[0]
            first_position_start_date = first_experience.find_element(By.XPATH, "//div[@class='display-flex flex-column full-width align-self-center']//span[@class='t-14 t-normal t-black--light']//span[@class='pvs-entity__caption-wrapper']").text
            
            cur.execute('INSERT INTO tbl_LinkedIn_Company_Employee (Name, Position, StartDate) VALUES (%s, %s, %s)', (employee_name, employee_position, first_position_start_date.split('-')[0]))
            
            print(employee_name)
            print(employee_position)
            con.commit()
    
    def pagination():
        while True:
            save_employee()
            new_page_height = driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
            if new_page_height == previous_page_height:
                break
            previous_page_height = new_page_height

confirm_button = ctkinter.CTkButton(master=topview, command=run_query, text="OK", fg_color=("#DB3E39", "#821D1A")).place(relx=0.5, relyx=0.5, anchor='c')

Also, I wonder in an infinite scroll page I can just simply click on “show more” or keep scrolling until the end, and, AFTER that it goes to the first element of the loop until the end.

Thiết kế website giá rẻ

Danh mục

Why this for is getting only the first element? and how to do the automatic scroll?