Thiết kế website giá rẻ

Question

Every time I run my code the lengths of the lists are different than the last time. And every time jobTitles and links are longer than companyName and jobLocation. I’m not sure how to fix this. All of the lists should be the same length. I’m just not sure what is happening that they are alwasy different lengths, not always by the same amount either.

import hrequests
from bs4 import BeautifulSoup
import pandas as pd
import csv

####################
## INDEED SCRAPER ##
####################

# Creates the CSV in Write Mode
with open('Indeed_Jobs.csv', 'w', newline='') as file:
    writer = csv.writer(file)

#Creating Lists for info
jobTitles = []
companyName = []
jobLocation = []
links = []

#Creates list of URLs
URLs = ["https://www.indeed.com/jobs?q=IT+Entry+Level&l=United+States&from=searchOnHP&vjk=55e3c7e5e7a919c9", 
        "https://www.indeed.com/jobs?q=development+entry+level&l=United+States&from=searchOnDesktopSerp&vjk=996b270bd119f225",
        "https://www.indeed.com/jobs?q=user+experience+entry+level&l=United+States&from=searchOnDesktopSerp&vjk=4fc6a443fd7c11df",
        "https://www.indeed.com/jobs?q=ux%2Fui+experience+entry+level&l=United+States&from=searchOnDesktopSerp&vjk=254dcd8c33926527",
        "https://www.indeed.com/jobs?q=data+entry+level&l=United+States&from=searchOnDesktopSerp&vjk=bd4fd45f0feb91c2"]

for url in URLs:

    #Connecting to Indeed and reading HTML
    target_url = url
    head= {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "Connection": "keep-alive",
        "Accept-Language": "en-US,en;q=0.9,lt;q=0.8,et;q=0.7,de;q=0.6",
    }
    resp = hrequests.get(target_url, headers=head)
    soup = BeautifulSoup(resp.text, 'html.parser')

    #Finds all items in list
    for div in soup.find_all('div', {'class': 'css-dekpa e37uo190'}):
        #Pulls job title
        for span in div.find_all('span'):
            jobTitles.append(span.text.strip())
        #Pulls link to job
        for a in div.find_all('a'):
            links.append("indeed.com" + a.get('href'))
        #Pulls company name
    for div in soup.find_all('div', {'class': 'company_location css-17fky0v e37uo190'}):
        for span in div.find_all('span',{'data-testid': 'company-name'}):
            companyName.append(span.text.strip())
        #Pulls job location
        for divLocation in div.find_all('div', {'data-testid': 'text-location'}):
            jobLocation.append(divLocation.text.strip())


    # Changing the heading on the CSV depending on which URL was searched
    if url == "https://www.indeed.com/jobs?q=development+entry+level&l=United+States&from=searchOnDesktopSerp&vjk=996b270bd119f225":
        additional_row = ["Development - Entry Level"]
    elif url =="https://www.indeed.com/jobs?q=it+entry+level&l=United+States&from=searchOnHP&vjk=bf9a1055e66b240c":
        additional_row = ["IT - Entry Level"]
    elif url == "https://www.indeed.com/jobs?q=user+experience+entry+level&l=United+States&from=searchOnDesktopSerp&vjk=4fc6a443fd7c11df":
        additional_row = ["User Experience - Entry Level"]
    elif url == "https://www.indeed.com/jobs?q=ux%2Fui+experience+entry+level&l=United+States&from=searchOnDesktopSerp&vjk=254dcd8c33926527":
        additional_row = ["UX/UI - Entry Level"]
    elif url == "https://www.indeed.com/jobs?q=data+entry+level&l=United+States&from=searchOnDesktopSerp&vjk=bd4fd45f0feb91c2":
        additional_row = ["Data - Entry Level"]
    else:
        additional_row = ["Title Not Found"]

    # Blank Row to make formatting nice
    blank_row = ['']



print(len(jobTitles))
print(len(companyName))
print(len(jobLocation))
print(len(links))

#
#   # Open the CSV file in append mode
#    with open('Indeed_Jobs.csv', 'a', newline='') as file:
#        writer = csv.writer(file)
#        # Write the header and blank rows for formatting
#        writer.writerow(blank_row)
#        writer.writerow(additional_row)
#        writer.writerow(blank_row)
#
#        # Write jobs to the CSV
#        df = pd.DataFrame({'Job Title': jobTitles, 'Company Name': companyName, 'Location': jobLocation, 'Link': links})
#        df.to_csv(file, mode='a', header=True, index=False)

Thiết kế website giá rẻ

Danh mục

Why are jobTitles and links always longer than companyName and jobLocation?