I’m starting out in the world of data streaming with Apache Kafka. In this learning process I am doing a small data ingestion practice project using the Coincap API (https://docs.coincap.io/#89deffa0-ab03-4e0a-8d92-637a857d2c91)
I am using the Confluent Kafka library, which I want for a set of cryptocurrency to generate a topic with 3 partitions each. My main problem is that I cannot send the messages generated by the Coincap API to each topic, that is, the Bitcoin information to the Bitcoin topic, the Ethereum information to the Ethereum topic and so on.
I have 2 files: producer_api.py and pipeline_data.py
producer_api.py code
from confluent_kafka import Producer
from confluent_kafka.admin import AdminClient, NewTopic
import json
# Obtener las variables necesarias desde el config.json:
#with open('config.json', 'r') as config_file:
#config = json.load(config_file)
#topic_name = {config.get('topic_name')}
servers = "localhost:9092"
conf = {'bootstrap.servers': servers,
'partitioner': 'consistent_random'}
producer = Producer(conf)
# Definiendo funciones de Kafka
def configure_create_topics(topics:list, servers:str=servers) -> None:
'''Configura y crea los topics necesarios para el cluster kafka.'''
a = AdminClient(conf)
# Estableciendo la configuración de los topics
topics = [NewTopic(topic, num_partitions=3)
for topic in topics]
# Llamar create_topics para crear topics de manera asíncrona, un diccionario
# de <topic,future> es devuelto.
fs = a.create_topics(topics, request_timeout=15.0)
# Esperar a que finalice la operación.
# Todos los futuros terminaran en el mismo tiempo.
for topic, f in fs.items():
try:
f.result() # The result itself is None
print(f"Topic {topic} creado")
except Exception as e:
print(f"No se pudo crear el topic {topic}: {e}")
def send_message(message, name_topic:str, id:str) -> None:
'''Comienza a enviar mensajes y crea una clave de mensaje para cada mensaje.
Cada clave se crea tomando las dos primeras letras del topic en mayúscula y
la identificación de la crypto'''
try:
#producer.send(topic, value=message.encode('utf-8'))
#producer.produce(topic = name_topic, value=json.dumps(val), key=f"{name_topic[:3]}{id}".encode('utf-8'))
producer.produce(topic = name_topic, value=message.encode('utf-8'), key=f"{name_topic[:3]}{id}".encode('utf-8'))
producer.flush()
print(f"Produced: {message} to Kafka topic: {name_topic}")
except Exception as error:
print(f"Error: {error}")
pipeline_data.py code:
import time
import requests
import datetime
from producer_api import send_message
import threading
import datetime
tags = ["Bitcoin","Ethereum","Ripple","Litecoin","Cardano","Dash","Monero"]
cryptos = ["bitcoin","ethereum","ripple","litecoin","cardano","dash","monero"]
url_api = "https://api.coincap.io/v2/assets"
def producer_thread():
while True:
try:
response = requests.get(url_api)
if response.status_code == 200:
data = response.json()
#Verificar la estructura de la respuesta
if 'data' in data and isinstance(data['data'], list):
# Crear una lista de diccionarios con la información
rows = [{'datetime': str(datetime.datetime.now()), 'name': crypto['name'], 'symbol': crypto['symbol'], 'price' : crypto['priceUsd']} for crypto in data['data'] if crypto['id'] in cryptos]
#Enviar los datos a Kafka
for row in rows: #accedemos a cada elemento de la lista (en este caso cada elemento es un dictionario)
for k,v in row.items(): #acedemos a cada llave(k), valor(v) de cada diccionario
if v['name'] == 'bitcoin':
current_datetime = datetime.datetime.now()
name = row['name']
symbol = row['symbol']
price = row['price']
# Produce data to Kafka topic
message = f"{current_datetime}, {name}, {symbol}, {price}"
send_message(message, name_topic= "Bitcoin", id=symbol)
if v['name'] == 'ethereum':
current_datetime = datetime.datetime.now()
name = row['name']
symbol = row['symbol']
price = row['price']
# Produce data to Kafka topic
message = f"{current_datetime}, {name}, {symbol}, {price}"
send_message(message, name_topic= "Ethereum", id=symbol)
if v['name'] == 'ripple':
current_datetime = datetime.datetime.now()
name = row['name']
symbol = row['symbol']
price = row['price']
# Produce data to Kafka topic
message = f"{current_datetime}, {name}, {symbol}, {price}"
send_message(message, name_topic= "Ripple", id=symbol)
if v['name'] == 'litecoin':
current_datetime = datetime.datetime.now()
name = row['name']
symbol = row['symbol']
price = row['price']
# Produce data to Kafka topic
message = f"{current_datetime}, {name}, {symbol}, {price}"
send_message(message, name_topic= "Litecoin", id=symbol)
if v['name'] == 'cardano':
current_datetime = datetime.datetime.now()
name = row['name']
symbol = row['symbol']
price = row['price']
# Produce data to Kafka topic
message = f"{current_datetime}, {name}, {symbol}, {price}"
send_message(message, name_topic= "Cardano", id=symbol)
if v['name'] == 'dash':
current_datetime = datetime.datetime.now()
name = row['name']
symbol = row['symbol']
price = row['price']
# Produce data to Kafka topic
message = f"{current_datetime}, {name}, {symbol}, {price}"
send_message(message, name_topic= "Dash", id=symbol)
if v['name'] == 'monero':
current_datetime = datetime.datetime.now()
name = row['name']
symbol = row['symbol']
price = row['price']
# Produce data to Kafka topic
message = f"{current_datetime}, {name}, {symbol}, {price}"
send_message(message, name_topic= "Monero", id=symbol)
else:
print("No existe el nombre de la criptomoneda.")
"""
if k['name'] == 'bitcoin':
value = v['price'].values()
send_message(value, name_topic= "Bitcoin", id=data['data']['id'])
elif k['name'] == 'ethereum':
value = v['price'].values()
send_message(value, name_topic= "Ethereum", id=data['data']['id'])
elif k['name'] == 'ripple':
value = v['price'].values()
send_message(value, name_topic= "Ripple", id=data['data']['id'])
elif k['name'] == 'litecoin':
value = v['price'].values()
send_message(value, name_topic= "Litecoin", id=data['data']['id'])
elif k['name'] == 'cardano':
value = v['price'].values()
send_message(value, name_topic= "Cardano", id=data['data']['id'])
elif k['name'] == 'dash':
value = v['price'].values()
send_message(value, name_topic= "Dash", id=data['data']['id'])
elif k['name'] == 'monero':
value = v['price'].values()
send_message(value, name_topic= "Monero", id=data['data']['id'])
else:
print("No existe el nombre de la criptomoneda.")
"""
else:
print("La estructura de la respuesta no es la esperada.")
else:
print(f"Error al realizar la solicitud a la API. Código de estado: {response.status_code}")
time.sleep(30)
except Exception as e:
print(f"Error en producer_thread: {str(e)}")
# Crear hilos separados para el producer y el consumer
producer_thread = threading.Thread(target=producer_thread)
# Comenzar los hilos
producer_thread.start()
# Esperar por los hilos hasta finalizar (lo que nunca sucederá en este caso ya que se ejecutan infinitamente)
producer_thread.join()
Despite the above, I cannot connect to the Topics because it tells me that the Topic cannot be found. Any suggestions?