I am trying to create a Python script that navigates to a YouTube video and scrapes the transcript and saves it locally.
The stages involved are: Navigate to video (url) -> click “more” -> “Show transcript” and then scrape the transcript and dump it in a JSON file.
Step 1: Click more (Completed)
Step 2: Click Show transcript (to be completed)
Step 3: Scrape transcript (to be completed).
Transcript window will then appear and load on the right side of the description window.
At the moment I can expand the description but can’t seem to find a way to click “Show transcript” and then scrape/save the transcript. This is what I need help with.
Here’s my code so far:
SBR_WEBDRIVER = ''
URL = 'https://www.youtube.com/watch?v=yKXk7L2RzLI'
logging.basicConfig(level=logging.INFO)
def expand_description(driver):
wait = WebDriverWait(driver, 10)
try:
element = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#expand')))
element.click()
logging.info("Description expanded successfully")
except Exception as e:
logging.error("Error occurred while expanding description: %s", e)
#TODO
def click_show_transcript(driver):
wait = WebDriverWait(driver, 10)
try:
element = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR,'#primary-button.style-scope.ytd-video-description-transcript-section-renderer')))
driver.execute_script("arguments[0].scrollIntoView();", element)
time.sleep(2)
element.click()
logging.info("Show transcript clicked successfully")
time.sleep(10)
except Exception as e:
logging.error("Error occurred while clicking show transcript: %s", e)
# TODO
# def scrape_transcript(driver):
# try:
# extract full transcript into variable
# return transcript
# except NoSuchElementException as e:
# logging.error("Error occurred while scraping transcript: %s", e)
def save_transcript_to_json(transcript):
with open('transcript_dump.json', 'w') as file:
json.dump(transcript, file, indent=4)
logging.info("Transcript saved to transcript_dump.json")
logging.info('Connecting to Scraping Browser...')
with Remote(SBR_WEBDRIVER, options=ChromeOptions()) as driver:
logging.info("Getting url: %s", URL)
driver.get(URL)
logging.info("Successfully connected to Scraping Browser...")
try:
logging.info("Waiting for the video to load...")
WebDriverWait(driver, 10).until(EC.visibility_of_element_located((By.CSS_SELECTOR, 'h1.ytd-watch-metadata')))
logging.info("Video loaded")
expand_description(driver)
click_show_transcript(driver)
# transcript = scrape_transcript(driver)
# save_transcript_to_json(transcript)
logging.info("Scraping complete")
except Exception as e:
logging.error("Error occurred during scraping: %s", e)
Thanks.