When I do web crawling, something wrong with BeautifulSoup’s find_all() function

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code># Import JoinQuant Library
import jqdata
import pandas as pd
from jqdata import finance
from jqdata import *
import requests
from bs4 import BeautifulSoup
# Initialize functions, and setting benchmarks etc.
def initialize(context):
# Setting HS300(000300.XSHG) as benchmark
set_benchmark('000300.XSHG')
# Start active back-adjustment mode, i.e. real price.
set_option('use_real_price', True)
# Setting commissions for stock trading:0.03% when buying,0.03% plus 0.1% stamp duty when selling, with 5 RMB minimum per transaction cost.
set_order_cost(OrderCost(close_tax = 0.001, open_commission = 0.0003, close_commission = 0.0003, min_commission = 5), type = 'stock')
# Setting commissions for index futures trading: 0.0023% when buying & selling, 0.23% when closing today's positions.
set_order_cost(OrderCost(open_commission = 0.000023, close_commission = 0.000023, close_today_commission = 0.0023, min_commission = 0), type = 'index_futures')
# Filtering ST stocks
# Return the list of stocks after filtering (i.e. those non-ST stocks in a list)
def filter_st(stocks, day):
dataframe = get_extras('is_st', stocks, start_date = day, end_date = day, df = True)
non_st_list = []
for i in list(dataframe.columns):
if not dataframe[i].bool():
non_st_list.append(i)
return non_st_list
# Filtering paused stocks, including those which hit high_limit or low_limit when opening
# Return the list of stocks after filtering (i.e. those non-paused stocks in a list)
def filter_paused(stocks):
current_data = get_current_data(stocks)
return [stock for stock in stocks if not (
current_data[stock].paused or
current_data[stock].day_open == current_data[stock].high_limit or
current_data[stock].day_open == current_data[stock].low_limit
)]
# Call handle_data function once every unit time (if backtest per day, then call it once per day; if backtest by minute, then call it once per minute)
def handle_data(context, data):
# Get the current date in the form 20XX-XX-XX
current_date = context.current_dt.date()
# Get all stocks
all_stocks = list(get_all_securities(types = ['stock'], date = None).index)
# Filter ST stocks with the base of all_stocks in the previous step
non_st_stocks = filter_st(all_stocks, current_date)
# Filter paused stocks on top of non_st_stocks in the previous step
filter_paused(non_st_stocks)
# Set the stock list after 2 steps of filtering as target_list
target_list = filter_paused(non_st_stocks)
# Set the stocks with minimum 10% of market capital as target_list_10%
df1 = get_valuation(target_list, start_date = current_date, end_date = current_date, fields = 'market_cap', count = None)
sorted_df1 = df1.sort_values('market_cap', ascending = True)
target_list_10_percent = sorted_df1.iloc[0: int(len(sorted_df1.index) * 0.1)]
print(target_list_10_percent)
# Set the stocks with minimum 20% of market capital as target_list_20%
df2 = get_valuation(target_list, start_date = current_date, end_date = current_date, fields = 'market_cap', count = None)
sorted_df2 = df2.sort_values('market_cap', ascending = True)
target_list_20_percent = sorted_df2.iloc[0: int(len(sorted_df2.index) * 0.2)]
print(target_list_20_percent)
# Web Crawler
# Sent HTTP Request to get Internet content
url = 'https://data.eastmoney.com/executive/000001.html'
response = requests.get(url)
html_content = response.text
# Check if the request is successful
if response.status_code == 200:
# Use BeautifulSoup to Analyze Internet information and get the table
soup = BeautifulSoup(html_content, 'html.parser')
table = soup.find_all('table')
# Acquire the rows and columns of the table
rows = table.find_all('tr')
data = []
for row in rows:
cols = row.find_all('td')
row_data = []
for col in cols:
row_data.append(col.text.strip())
data.append(row_data)
else:
print("Failed to Retrieve the Webpage.")
# Set up DataFrame
dataframe = pd.DataFrame(data)
# Print DataFrame
print(dataframe)
</code>
<code># Import JoinQuant Library import jqdata import pandas as pd from jqdata import finance from jqdata import * import requests from bs4 import BeautifulSoup # Initialize functions, and setting benchmarks etc. def initialize(context): # Setting HS300(000300.XSHG) as benchmark set_benchmark('000300.XSHG') # Start active back-adjustment mode, i.e. real price. set_option('use_real_price', True) # Setting commissions for stock trading:0.03% when buying,0.03% plus 0.1% stamp duty when selling, with 5 RMB minimum per transaction cost. set_order_cost(OrderCost(close_tax = 0.001, open_commission = 0.0003, close_commission = 0.0003, min_commission = 5), type = 'stock') # Setting commissions for index futures trading: 0.0023% when buying & selling, 0.23% when closing today's positions. set_order_cost(OrderCost(open_commission = 0.000023, close_commission = 0.000023, close_today_commission = 0.0023, min_commission = 0), type = 'index_futures') # Filtering ST stocks # Return the list of stocks after filtering (i.e. those non-ST stocks in a list) def filter_st(stocks, day): dataframe = get_extras('is_st', stocks, start_date = day, end_date = day, df = True) non_st_list = [] for i in list(dataframe.columns): if not dataframe[i].bool(): non_st_list.append(i) return non_st_list # Filtering paused stocks, including those which hit high_limit or low_limit when opening # Return the list of stocks after filtering (i.e. those non-paused stocks in a list) def filter_paused(stocks): current_data = get_current_data(stocks) return [stock for stock in stocks if not ( current_data[stock].paused or current_data[stock].day_open == current_data[stock].high_limit or current_data[stock].day_open == current_data[stock].low_limit )] # Call handle_data function once every unit time (if backtest per day, then call it once per day; if backtest by minute, then call it once per minute) def handle_data(context, data): # Get the current date in the form 20XX-XX-XX current_date = context.current_dt.date() # Get all stocks all_stocks = list(get_all_securities(types = ['stock'], date = None).index) # Filter ST stocks with the base of all_stocks in the previous step non_st_stocks = filter_st(all_stocks, current_date) # Filter paused stocks on top of non_st_stocks in the previous step filter_paused(non_st_stocks) # Set the stock list after 2 steps of filtering as target_list target_list = filter_paused(non_st_stocks) # Set the stocks with minimum 10% of market capital as target_list_10% df1 = get_valuation(target_list, start_date = current_date, end_date = current_date, fields = 'market_cap', count = None) sorted_df1 = df1.sort_values('market_cap', ascending = True) target_list_10_percent = sorted_df1.iloc[0: int(len(sorted_df1.index) * 0.1)] print(target_list_10_percent) # Set the stocks with minimum 20% of market capital as target_list_20% df2 = get_valuation(target_list, start_date = current_date, end_date = current_date, fields = 'market_cap', count = None) sorted_df2 = df2.sort_values('market_cap', ascending = True) target_list_20_percent = sorted_df2.iloc[0: int(len(sorted_df2.index) * 0.2)] print(target_list_20_percent) # Web Crawler # Sent HTTP Request to get Internet content url = 'https://data.eastmoney.com/executive/000001.html' response = requests.get(url) html_content = response.text # Check if the request is successful if response.status_code == 200: # Use BeautifulSoup to Analyze Internet information and get the table soup = BeautifulSoup(html_content, 'html.parser') table = soup.find_all('table') # Acquire the rows and columns of the table rows = table.find_all('tr') data = [] for row in rows: cols = row.find_all('td') row_data = [] for col in cols: row_data.append(col.text.strip()) data.append(row_data) else: print("Failed to Retrieve the Webpage.") # Set up DataFrame dataframe = pd.DataFrame(data) # Print DataFrame print(dataframe) </code>
# Import JoinQuant Library
import jqdata
import pandas as pd
from jqdata import finance
from jqdata import *
import requests
from bs4 import BeautifulSoup

# Initialize functions, and setting benchmarks etc.
def initialize(context):
    # Setting HS300(000300.XSHG) as benchmark
    set_benchmark('000300.XSHG')
    # Start active back-adjustment mode, i.e. real price.
    set_option('use_real_price', True)
    # Setting commissions for stock trading:0.03% when buying,0.03% plus 0.1% stamp duty when selling, with 5 RMB minimum per transaction cost.
    set_order_cost(OrderCost(close_tax = 0.001, open_commission = 0.0003, close_commission = 0.0003, min_commission = 5), type = 'stock')
    # Setting commissions for index futures trading: 0.0023% when buying & selling, 0.23% when closing today's positions.
    set_order_cost(OrderCost(open_commission = 0.000023, close_commission = 0.000023, close_today_commission = 0.0023, min_commission = 0), type = 'index_futures')

# Filtering ST stocks
# Return the list of stocks after filtering (i.e. those non-ST stocks in a list)
def filter_st(stocks, day):
   dataframe = get_extras('is_st', stocks, start_date = day, end_date = day, df = True)
   non_st_list = []
   for i in list(dataframe.columns):
        if not dataframe[i].bool():
           non_st_list.append(i)

   return non_st_list

# Filtering paused stocks, including those which hit high_limit or low_limit when opening
# Return the list of stocks after filtering (i.e. those non-paused stocks in a list)
def filter_paused(stocks):
    current_data = get_current_data(stocks)
    return [stock for stock in stocks if not (
        current_data[stock].paused or
        current_data[stock].day_open == current_data[stock].high_limit or
        current_data[stock].day_open == current_data[stock].low_limit
        )]

# Call handle_data function once every unit time (if backtest per day, then call it once per  day; if backtest by minute, then call it once per minute)
def handle_data(context, data):
    # Get the current date in the form 20XX-XX-XX
    current_date = context.current_dt.date()
    # Get all stocks
    all_stocks = list(get_all_securities(types = ['stock'], date = None).index)
    # Filter ST stocks with the base of all_stocks in the previous step
    non_st_stocks = filter_st(all_stocks, current_date)
    # Filter paused stocks on top of non_st_stocks in the previous step
    filter_paused(non_st_stocks)
    # Set the stock list after 2 steps of filtering as target_list
    target_list = filter_paused(non_st_stocks)
    # Set the stocks with minimum 10% of market capital as target_list_10%
    df1 = get_valuation(target_list, start_date = current_date, end_date = current_date, fields = 'market_cap', count = None)
    sorted_df1 = df1.sort_values('market_cap', ascending = True)
    target_list_10_percent = sorted_df1.iloc[0: int(len(sorted_df1.index) * 0.1)]
    print(target_list_10_percent)
    # Set the stocks with minimum 20% of market capital as target_list_20%
    df2 = get_valuation(target_list, start_date = current_date, end_date = current_date, fields = 'market_cap', count = None)
    sorted_df2 = df2.sort_values('market_cap', ascending = True)
    target_list_20_percent = sorted_df2.iloc[0: int(len(sorted_df2.index) * 0.2)]
    print(target_list_20_percent)

    # Web Crawler
    # Sent HTTP Request to get Internet content
    url = 'https://data.eastmoney.com/executive/000001.html'
    response = requests.get(url)
    html_content = response.text

    # Check if the request is successful
    if response.status_code == 200:
        # Use BeautifulSoup to Analyze Internet information and get the table
        soup = BeautifulSoup(html_content, 'html.parser')
        table = soup.find_all('table')
        # Acquire the rows and columns of the table
        rows = table.find_all('tr')
        data = []
        for row in rows:
            cols = row.find_all('td')
            row_data = []
            for col in cols:
                row_data.append(col.text.strip())
            data.append(row_data)
    else:
        print("Failed to Retrieve the Webpage.")

    # Set up DataFrame
    dataframe = pd.DataFrame(data)
    # Print DataFrame
    print(dataframe)

Here’s the result:

Plain text
Copy to clipboard
Open code in new window
EnlighterJS 3 Syntax Highlighter
<code>Traceback (most recent call last):
File "/tmp/jqcore/jqboson/jqboson/core/entry.py", line 379, in _run
engine.start()
File "/tmp/jqcore/jqboson/jqboson/core/engine.py", line 231, in start
self._dispatcher.start()
File "/tmp/jqcore/jqboson/jqboson/core/dispatcher.py", line 280, in start
self._run_loop()
File "/tmp/jqcore/jqboson/jqboson/core/dispatcher.py", line 240, in _run_loop
self._loop.run()
File "/tmp/jqcore/jqboson/jqboson/core/loop/loop.py", line 107, in run
self._handle_queue()
File "/tmp/jqcore/jqboson/jqboson/core/loop/loop.py", line 153, in _handle_queue
message.callback(**message.callback_data)
File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_subscriber.py", line 228, in broadcast
consumer.send(market_data)
File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 59, in consumer_gen
msg_callback()
File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 52, in msg_callback
callback(market_data)
File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 122, in wrapper
result = callback(*args, **kwargs)
File "/tmp/jqcore/jqboson/jqboson/core/strategy.py", line 474, in _wrapper
self._context.current_dt
File "/tmp/strategy/user_code.py", line 86, in handle_data
rows = table.find_all('tr')
File "bs4/element.py", line 1884, in __getattr__
"ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key
</code>
<code>Traceback (most recent call last): File "/tmp/jqcore/jqboson/jqboson/core/entry.py", line 379, in _run engine.start() File "/tmp/jqcore/jqboson/jqboson/core/engine.py", line 231, in start self._dispatcher.start() File "/tmp/jqcore/jqboson/jqboson/core/dispatcher.py", line 280, in start self._run_loop() File "/tmp/jqcore/jqboson/jqboson/core/dispatcher.py", line 240, in _run_loop self._loop.run() File "/tmp/jqcore/jqboson/jqboson/core/loop/loop.py", line 107, in run self._handle_queue() File "/tmp/jqcore/jqboson/jqboson/core/loop/loop.py", line 153, in _handle_queue message.callback(**message.callback_data) File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_subscriber.py", line 228, in broadcast consumer.send(market_data) File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 59, in consumer_gen msg_callback() File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 52, in msg_callback callback(market_data) File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 122, in wrapper result = callback(*args, **kwargs) File "/tmp/jqcore/jqboson/jqboson/core/strategy.py", line 474, in _wrapper self._context.current_dt File "/tmp/strategy/user_code.py", line 86, in handle_data rows = table.find_all('tr') File "bs4/element.py", line 1884, in __getattr__ "ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key </code>
Traceback (most recent call last):
  File "/tmp/jqcore/jqboson/jqboson/core/entry.py", line 379, in _run
    engine.start()
  File "/tmp/jqcore/jqboson/jqboson/core/engine.py", line 231, in start
    self._dispatcher.start()
  File "/tmp/jqcore/jqboson/jqboson/core/dispatcher.py", line 280, in start
    self._run_loop()
  File "/tmp/jqcore/jqboson/jqboson/core/dispatcher.py", line 240, in _run_loop
    self._loop.run()
  File "/tmp/jqcore/jqboson/jqboson/core/loop/loop.py", line 107, in run
    self._handle_queue()
  File "/tmp/jqcore/jqboson/jqboson/core/loop/loop.py", line 153, in _handle_queue
    message.callback(**message.callback_data)
  File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_subscriber.py", line 228, in broadcast
    consumer.send(market_data)
  File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 59, in  consumer_gen
    msg_callback()
  File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 52, in msg_callback
    callback(market_data)
  File "/tmp/jqcore/jqboson/jqboson/core/mds/market_data_consumer_manager.py", line 122, in wrapper
    result = callback(*args, **kwargs)
  File "/tmp/jqcore/jqboson/jqboson/core/strategy.py", line 474, in _wrapper
    self._context.current_dt
  File "/tmp/strategy/user_code.py", line 86, in handle_data
    rows = table.find_all('tr')
  File "bs4/element.py", line 1884, in __getattr__
    "ResultSet object has no attribute '%s'. You're probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?" % key

AttributeError: ResultSet object has no attribute ‘find_all’. You’re probably treating a list of items like a single item. Did you call find_all() when you meant to call find()?

There’s something wrong with BeautifulSoup’s find_all() function. Can someone help me figure it out please? Thanks.

Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa Dịch vụ tổ chức sự kiện 5 sao Thông tin về chúng tôi Dịch vụ sinh nhật bé trai Dịch vụ sinh nhật bé gái Sự kiện trọn gói Các tiết mục giải trí Dịch vụ bổ trợ Tiệc cưới sang trọng Dịch vụ khai trương Tư vấn tổ chức sự kiện Hình ảnh sự kiện Cập nhật tin tức Liên hệ ngay Thuê chú hề chuyên nghiệp Tiệc tất niên cho công ty Trang trí tiệc cuối năm Tiệc tất niên độc đáo Sinh nhật bé Hải Đăng Sinh nhật đáng yêu bé Khánh Vân Sinh nhật sang trọng Bích Ngân Tiệc sinh nhật bé Thanh Trang Dịch vụ ông già Noel Xiếc thú vui nhộn Biểu diễn xiếc quay đĩa Dịch vụ tổ chức tiệc uy tín Khám phá dịch vụ của chúng tôi Tiệc sinh nhật cho bé trai Trang trí tiệc cho bé gái Gói sự kiện chuyên nghiệp Chương trình giải trí hấp dẫn Dịch vụ hỗ trợ sự kiện Trang trí tiệc cưới đẹp Khởi đầu thành công với khai trương Chuyên gia tư vấn sự kiện Xem ảnh các sự kiện đẹp Tin mới về sự kiện Kết nối với đội ngũ chuyên gia Chú hề vui nhộn cho tiệc sinh nhật Ý tưởng tiệc cuối năm Tất niên độc đáo Trang trí tiệc hiện đại Tổ chức sinh nhật cho Hải Đăng Sinh nhật độc quyền Khánh Vân Phong cách tiệc Bích Ngân Trang trí tiệc bé Thanh Trang Thuê dịch vụ ông già Noel chuyên nghiệp Xem xiếc khỉ đặc sắc Xiếc quay đĩa thú vị
Trang chủ Giới thiệu Sinh nhật bé trai Sinh nhật bé gái Tổ chức sự kiện Biểu diễn giải trí Dịch vụ khác Trang trí tiệc cưới Tổ chức khai trương Tư vấn dịch vụ Thư viện ảnh Tin tức - sự kiện Liên hệ Chú hề sinh nhật Trang trí YEAR END PARTY công ty Trang trí tất niên cuối năm Trang trí tất niên xu hướng mới nhất Trang trí sinh nhật bé trai Hải Đăng Trang trí sinh nhật bé Khánh Vân Trang trí sinh nhật Bích Ngân Trang trí sinh nhật bé Thanh Trang Thuê ông già Noel phát quà Biểu diễn xiếc khỉ Xiếc quay đĩa
Thiết kế website Thiết kế website Thiết kế website Cách kháng tài khoản quảng cáo Mua bán Fanpage Facebook Dịch vụ SEO Tổ chức sinh nhật