As per the title building a python video downloader script other than youtube and it’s pytube lib and to get straight to it, using chatgpt here’s what I’m currently at:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
from tqdm import tqdm
import requests
def extract_video_urls(url):
# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument('--headless') # Run Chrome in headless mode
chrome_options.add_argument('--disable-gpu') # Disable GPU acceleration
# Path to Chrome WebDriver executable
webdriver_service = Service('path/to/chromedriver')
# Initialize Chrome WebDriver
driver = webdriver.Chrome(service=webdriver_service, options=chrome_options)
try:
# Load the webpage
driver.get(url)
# Extract video URLs using JavaScript
video_urls = driver.execute_script("""
var videos = [];
var videoElements = document.querySelectorAll('video');
videoElements.forEach(function(video) {
videos.push(video.src);
});
return videos;
""")
return video_urls
finally:
# Close the WebDriver session
driver.quit()
def download_video(url, filename):
"""Downloads a video from the given URL and saves it with the specified filename."""
try:
response = requests.get(url, stream=True)
response.raise_for_status() # Raise exception for non-200 status codes
total_size = int(response.headers.get('content-length', 0))
chunk_size = 1024
progress_bar = tqdm(total=total_size, unit='B', unit_scale=True, desc=filename)
with open(filename, 'wb') as f:
for chunk in response.iter_content(chunk_size=chunk_size):
if chunk:
f.write(chunk)
progress_bar.update(len(chunk))
progress_bar.close()
print(f"nDownloaded video: {filename}")
except requests.exceptions.RequestException as e:
print(f"Error downloading video: {e}")
def main():
website_url = input("Enter website URL: ")
video_urls = extract_video_urls(website_url)
if video_urls:
print("Found video URLs:")
for i, url in enumerate(video_urls):
print(f"{i+1}. {url}")
download_choice = input("Enter video number to download (or 0 to exit): ")
if download_choice.isdigit() and int(download_choice) > 0:
video_index = int(download_choice) - 1
if 0 <= video_index < len(video_urls):
filename = f"video_{video_index + 1}.mp4" # Adjust filename format
download_video(video_urls[video_index], filename)
else:
print("Invalid video number.")
else:
print("No video URLs found on the provided webpage.")
if __name__ == "__main__":
main()
-
But eventually I end up with an exception that’s caught by the download_video function like so:
Error downloading video: No connection adapters were found for ‘blob:https://www.ted.com/45e459e3-974b- 417c-8f05-0a92cac0f22e’ -
Which after lengthy chats with various LLMs i got to learn that:
- Downloading a video from a blob URL can be a bit tricky because a blob URL does not refer to a direct file that can be downloaded; it refers to binary data stored in the browser’s memory.
- And that typically, to download a video from a blob URL, you need to extract the actual video file URL from the network requests made by the browser.
-
But eventually I end up with an exception that’s caught by the download_video function like so:
Error downloading video: No connection adapters were found for ‘blob:https://www.ted.com/45e459e3-974b- 417c-8f05-0a92cac0f22e’ -
Which after lengthy chats with various LLMs i got to learn that:
- Downloading a video from a blob URL can be a bit tricky because a blob URL does not refer to a direct file that can be downloaded; it refers to binary data stored in the browser’s memory.
- And that typically, to download a video from a blob URL, you need to extract the actual video file URL from the network requests made by the browser.
I simply need a way or an approach to be able to download a video from websites other that youtube