I have a Python script that scrapes data from CoinGecko. It works perfectly on my laptop but throws an error when I run it on my PC. The script is as follows:
<code>import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import gzip
import brotli
import io
import time
# Function to get the page content with custom headers
def get_page_content(url):
req = urllib.request.Request(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
})
response = urllib.request.urlopen(req)
# Handle different content encodings
if response.info().get('Content-Encoding') == 'gzip':
buf = io.BytesIO(response.read())
data = gzip.GzipFile(fileobj=buf).read()
elif response.info().get('Content-Encoding') == 'br':
data = brotli.decompress(response.read())
else:
data = response.read()
return data
# Function to extract table data from a given page URL
def extract_table_data(page_url):
try:
webpage = get_page_content(page_url)
soup = BeautifulSoup(webpage, 'html.parser')
div_element = soup.find('div', class_='tw-mb-6 lg:tw-mb-12')
if div_element:
html_table = div_element.find('table')
if html_table:
df = pd.read_html(str(html_table))[0]
df = df.loc[:, df.columns[1:-1]] # Adjust the columns as per your requirement
return df
else:
print(f"No table found in the specified div for URL: {page_url}")
else:
print(f"Specified div element not found for URL: {page_url}")
except Exception as e:
print(f"An error occurred for URL {page_url}: {str(e)}")
return None
# Base URL
base_url = 'https://www.coingecko.com/en/coins/1/markets/spot?page='
# DataFrame to collect all data
all_data = pd.DataFrame()
# Start page
page = 1
max_retries = 3
retry_delay = 5
max_consecutive_errors = 5
consecutive_errors = 0
while True:
url = base_url + str(page)
print(f"Processing {url}")
retries = 0
while retries < max_retries:
try:
df = extract_table_data(url)
if df is not None:
all_data = pd.concat([all_data, df], ignore_index=True)
consecutive_errors = 0 # Reset consecutive errors counter
break # Successfully retrieved data, break out of the retry loop
else:
print(f"No data found on page {page}, stopping.")
consecutive_errors += 1
break
except urllib.error.HTTPError as e:
if e.code == 404:
print(f"HTTP Error 404 on page {page}. Stopping.")
consecutive_errors += 1
break
else:
print(f"HTTP Error on page {page}: {e.code}. Retrying...")
retries += 1
time.sleep(retry_delay)
except Exception as e:
print(f"An error occurred on page {page}: {str(e)}. Retrying...")
retries += 1
time.sleep(retry_delay)
if consecutive_errors >= max_consecutive_errors:
print(f"Stopping due to {max_consecutive_errors} consecutive errors.")
break
page += 1
# Save the complete DataFrame to CSV in the specified path
save_path = r'C:UsershamidDownloadsCrypto_Data_Table.csv'
all_data.to_csv(save_path, index=False)
print(f"All data saved to '{save_path}'")
</code>
<code>import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import gzip
import brotli
import io
import time
# Function to get the page content with custom headers
def get_page_content(url):
req = urllib.request.Request(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
})
response = urllib.request.urlopen(req)
# Handle different content encodings
if response.info().get('Content-Encoding') == 'gzip':
buf = io.BytesIO(response.read())
data = gzip.GzipFile(fileobj=buf).read()
elif response.info().get('Content-Encoding') == 'br':
data = brotli.decompress(response.read())
else:
data = response.read()
return data
# Function to extract table data from a given page URL
def extract_table_data(page_url):
try:
webpage = get_page_content(page_url)
soup = BeautifulSoup(webpage, 'html.parser')
div_element = soup.find('div', class_='tw-mb-6 lg:tw-mb-12')
if div_element:
html_table = div_element.find('table')
if html_table:
df = pd.read_html(str(html_table))[0]
df = df.loc[:, df.columns[1:-1]] # Adjust the columns as per your requirement
return df
else:
print(f"No table found in the specified div for URL: {page_url}")
else:
print(f"Specified div element not found for URL: {page_url}")
except Exception as e:
print(f"An error occurred for URL {page_url}: {str(e)}")
return None
# Base URL
base_url = 'https://www.coingecko.com/en/coins/1/markets/spot?page='
# DataFrame to collect all data
all_data = pd.DataFrame()
# Start page
page = 1
max_retries = 3
retry_delay = 5
max_consecutive_errors = 5
consecutive_errors = 0
while True:
url = base_url + str(page)
print(f"Processing {url}")
retries = 0
while retries < max_retries:
try:
df = extract_table_data(url)
if df is not None:
all_data = pd.concat([all_data, df], ignore_index=True)
consecutive_errors = 0 # Reset consecutive errors counter
break # Successfully retrieved data, break out of the retry loop
else:
print(f"No data found on page {page}, stopping.")
consecutive_errors += 1
break
except urllib.error.HTTPError as e:
if e.code == 404:
print(f"HTTP Error 404 on page {page}. Stopping.")
consecutive_errors += 1
break
else:
print(f"HTTP Error on page {page}: {e.code}. Retrying...")
retries += 1
time.sleep(retry_delay)
except Exception as e:
print(f"An error occurred on page {page}: {str(e)}. Retrying...")
retries += 1
time.sleep(retry_delay)
if consecutive_errors >= max_consecutive_errors:
print(f"Stopping due to {max_consecutive_errors} consecutive errors.")
break
page += 1
# Save the complete DataFrame to CSV in the specified path
save_path = r'C:UsershamidDownloadsCrypto_Data_Table.csv'
all_data.to_csv(save_path, index=False)
print(f"All data saved to '{save_path}'")
</code>
import urllib.request
from bs4 import BeautifulSoup
import pandas as pd
import gzip
import brotli
import io
import time
# Function to get the page content with custom headers
def get_page_content(url):
req = urllib.request.Request(url, headers={
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
})
response = urllib.request.urlopen(req)
# Handle different content encodings
if response.info().get('Content-Encoding') == 'gzip':
buf = io.BytesIO(response.read())
data = gzip.GzipFile(fileobj=buf).read()
elif response.info().get('Content-Encoding') == 'br':
data = brotli.decompress(response.read())
else:
data = response.read()
return data
# Function to extract table data from a given page URL
def extract_table_data(page_url):
try:
webpage = get_page_content(page_url)
soup = BeautifulSoup(webpage, 'html.parser')
div_element = soup.find('div', class_='tw-mb-6 lg:tw-mb-12')
if div_element:
html_table = div_element.find('table')
if html_table:
df = pd.read_html(str(html_table))[0]
df = df.loc[:, df.columns[1:-1]] # Adjust the columns as per your requirement
return df
else:
print(f"No table found in the specified div for URL: {page_url}")
else:
print(f"Specified div element not found for URL: {page_url}")
except Exception as e:
print(f"An error occurred for URL {page_url}: {str(e)}")
return None
# Base URL
base_url = 'https://www.coingecko.com/en/coins/1/markets/spot?page='
# DataFrame to collect all data
all_data = pd.DataFrame()
# Start page
page = 1
max_retries = 3
retry_delay = 5
max_consecutive_errors = 5
consecutive_errors = 0
while True:
url = base_url + str(page)
print(f"Processing {url}")
retries = 0
while retries < max_retries:
try:
df = extract_table_data(url)
if df is not None:
all_data = pd.concat([all_data, df], ignore_index=True)
consecutive_errors = 0 # Reset consecutive errors counter
break # Successfully retrieved data, break out of the retry loop
else:
print(f"No data found on page {page}, stopping.")
consecutive_errors += 1
break
except urllib.error.HTTPError as e:
if e.code == 404:
print(f"HTTP Error 404 on page {page}. Stopping.")
consecutive_errors += 1
break
else:
print(f"HTTP Error on page {page}: {e.code}. Retrying...")
retries += 1
time.sleep(retry_delay)
except Exception as e:
print(f"An error occurred on page {page}: {str(e)}. Retrying...")
retries += 1
time.sleep(retry_delay)
if consecutive_errors >= max_consecutive_errors:
print(f"Stopping due to {max_consecutive_errors} consecutive errors.")
break
page += 1
# Save the complete DataFrame to CSV in the specified path
save_path = r'C:UsershamidDownloadsCrypto_Data_Table.csv'
all_data.to_csv(save_path, index=False)
print(f"All data saved to '{save_path}'")
When I run it on my PC, I receive the following error:
<code>Processing https://www.coingecko.com/en/coins/1/markets/spot?page=1
An error occurred for URL https://www.coingecko.com/en/coins/1/markets/spot?page=1: nonnumeric port: 'port'
No data found on page 1, stopping.
Processing https://www.coingecko.com/en/coins/1/markets/spot?page=2
An error occurred for URL https://www.coingecko.com/en/coins/1/markets/spot?page=2: nonnumeric port: 'port'
No data found on page 2, stopping.
...
Stopping due to 5 consecutive errors.
All data saved to 'C:UsershamidDownloadsCrypto_Data_Table.csv'
</code>
<code>Processing https://www.coingecko.com/en/coins/1/markets/spot?page=1
An error occurred for URL https://www.coingecko.com/en/coins/1/markets/spot?page=1: nonnumeric port: 'port'
No data found on page 1, stopping.
Processing https://www.coingecko.com/en/coins/1/markets/spot?page=2
An error occurred for URL https://www.coingecko.com/en/coins/1/markets/spot?page=2: nonnumeric port: 'port'
No data found on page 2, stopping.
...
Stopping due to 5 consecutive errors.
All data saved to 'C:UsershamidDownloadsCrypto_Data_Table.csv'
</code>
Processing https://www.coingecko.com/en/coins/1/markets/spot?page=1
An error occurred for URL https://www.coingecko.com/en/coins/1/markets/spot?page=1: nonnumeric port: 'port'
No data found on page 1, stopping.
Processing https://www.coingecko.com/en/coins/1/markets/spot?page=2
An error occurred for URL https://www.coingecko.com/en/coins/1/markets/spot?page=2: nonnumeric port: 'port'
No data found on page 2, stopping.
...
Stopping due to 5 consecutive errors.
All data saved to 'C:UsershamidDownloadsCrypto_Data_Table.csv'
I’m not using a proxy, and the same script works fine on my laptop. What could be causing this issue on my PC, and how can I resolve it?. Thanks in advance.