I’m having the following problem.
I’m running a Dagster instance on an EC2 machine. It’s in a Docker container. The Docker container itself can access the bucket, but when the code is running in Dagster, I’m getting the following error:
botocore.errorfactory.NoSuchBucket: An error occurred (NoSuchBucket) when calling the PutObject operation: The specified bucket does not exist File "/usr/local/lib/python3.11/site-packages/dagster/_grpc/impl.py", line 375, in get_external_sensor_execution with SensorEvaluationContext( File "/usr/local/lib/python3.11/site-packages/dagster/_core/definitions/sensor_definition.py", line 223, in __exit__ self._exit_stack.close() File "/usr/local/lib/python3.11/contextlib.py", line 597, in close self.__exit__(None, None, None) File "/usr/local/lib/python3.11/contextlib.py", line 589, in __exit__ raise exc_details[1] File "/usr/local/lib/python3.11/contextlib.py", line 574, in __exit__ if cb(*exc_details): ^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/dagster/_core/definitions/instigation_logger.py", line 118, in __exit__ self._exit_stack.close() File "/usr/local/lib/python3.11/contextlib.py", line 597, in close self.__exit__(None, None, None) File "/usr/local/lib/python3.11/contextlib.py", line 589, in __exit__ raise exc_details[1] File "/usr/local/lib/python3.11/contextlib.py", line 574, in __exit__ if cb(*exc_details): ^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/contextlib.py", line 144, in __exit__ next(self.gen) File "/usr/local/lib/python3.11/site-packages/dagster/_core/storage/cloud_storage_compute_log_manager.py", line 97, in open_log_stream self._on_capture_complete(log_key) File "/usr/local/lib/python3.11/site-packages/dagster/_core/storage/cloud_storage_compute_log_manager.py", line 101, in _on_capture_complete self.upload_to_cloud_storage(log_key, ComputeIOType.STDERR) File "/usr/local/lib/python3.11/site-packages/dagster_aws/s3/compute_log_manager.py", line 249, in upload_to_cloud_storage self._s3_session.upload_fileobj(data, self._s3_bucket, s3_key, ExtraArgs=extra_args) File "/usr/local/lib/python3.11/site-packages/boto3/s3/inject.py", line 642, in upload_fileobj return future.result() ^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/s3transfer/futures.py", line 103, in result return self._coordinator.result() ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/s3transfer/futures.py", line 266, in result raise self._exception File "/usr/local/lib/python3.11/site-packages/s3transfer/tasks.py", line 139, in __call__ return self._execute_main(kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/s3transfer/tasks.py", line 162, in _execute_main return_value = self._main(**kwargs) ^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/s3transfer/upload.py", line 764, in _main client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args) File "/usr/local/lib/python3.11/site-packages/botocore/client.py", line 565, in _api_call return self._make_api_call(operation_name, kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/usr/local/lib/python3.11/site-packages/botocore/client.py", line 1021, in _make_api_call raise error_class(parsed_response, operation_name)
Dagster itself is not an off-the-shelf image, but a custom one, the DockerFile looks like this:
FROM python:3.11.3-slim as python-base
# Create necessary directories
RUN mkdir -p /opt/dagster/dagster_home /opt/dagster/app /opt/dagster/app/dagster-ml-pipeline/ /opt/dagster/app/dagster-ml-pipeline/etl/
# Install necessary system packages
RUN apt-get update && apt-get install -y supervisor curl unzip
RUN curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && unzip awscliv2.zip && ./aws/install
# Install Python packages
RUN pip install dagster-webserver dagster-postgres dagster-aws kafka-python==2.0.2 boto3
# Set environment variables
ENV DAGSTER_HOME=/opt/dagster/dagster_home/
# Copy project files
COPY repo.py workspace.yaml /opt/dagster/app/
COPY dagster.yaml /opt/dagster/dagster_home/
COPY desrv_company_graph/src/services/north_data_sync_job.py /opt/dagster/app/dagster-ml-pipeline/etl/company_graph_etls/
COPY .env /opt/dagster/app/dagster-ml-pipeline/
COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf
WORKDIR /opt/dagster/app
# Expose port for Dagit
EXPOSE 3000
# Start supervisor to manage processes
CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]
My Docker compose is the following:
version: '3.7'
services:
#Kafka
broker:
container_name: kafka-broker
image: confluentinc/confluent-local:7.4.1
hostname: kafka-broker
ports:
- "8082:8082"
- "9092:9092"
- "29092:29092"
- "29093:29093"
environment:
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka-broker:9092,PLAINTEXT_HOST://10.130.139.46:29092
KAFKA_LISTENERS: PLAINTEXT://0.0.0.0:9092,PLAINTEXT_HOST://0.0.0.0:29092,CONTROLLER://0.0.0.0:29093
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT,CONTROLLER:PLAINTEXT
KAFKA_CONTROLLER_LISTENER_NAMES: CONTROLLER
KAFKA_CONTROLLER_QUORUM_VOTERS: 1@kafka-broker:29093
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
KAFKA_NODE_ID: 1
KAFKA_PROCESS_ROLES: broker,controller
KAFKA_LOG_DIRS: /var/lib/kafka/data
KAFKA_MESSAGE_MAX_BYTES: 200000000 # Increase this value
KAFKA_REPLICA_FETCH_MAX_BYTES: 200000000 # Increase this value
volumes:
- /var/lib/kafka/data
networks:
- kafka-network
#Kafka UI
kafka-ui:
container_name: kafka-ui
image: provectuslabs/kafka-ui:latest
ports:
- 9999:8080
environment:
KAFKA_CLUSTERS_0_NAME: local
KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka-broker:9092
networks:
- kafka-network
#Dagster Webserver
dagster-dagit:
build:
context: .
dockerfile: Dockerfile
environment:
- DAGSTER_PG_PASSWORD=postgres
- DAGSTER_PG_USERNAME=postgres
- DAGSTER_PG_DB=postgres
- DAGSTER_PG_HOST=dagster-postgres
- DAGSTER_GRPC_TIMEOUT=120 # Increase timeout to 120 seconds
ports:
- 40001:3000
networks:
- kafka-network
#Dagster Daemon
dagster-daemon:
build:
context: .
dockerfile: Dockerfile
environment:
- DAGSTER_GRPC_TIMEOUT=120 # Increase timeout to 120 seconds
networks:
- kafka-network
#Dagster PG
dagster-postgres:
image: postgres:13.3
ports:
- 5433:5432
environment:
- POSTGRES_PASSWORD=postgres
- POSTGRES_DB=postgres
volumes:
- dagster-postgres:/var/lib/postgresql/data
networks:
- kafka-network
networks:
kafka-network:
name: kafka-network
volumes:
dagster-postgres:
driver: local
And this is the code which is actually running:
@sensor(job=ml_consumer_job)
def s3_files_sensor(context):
context.log.info("Starting s3_files_sensor evaluation")
last_offset = int(context.cursor) if context.cursor else 0
consumer = create_kafka_consumer(S3_FILES_TOPIC)
# Wait for the consumer to get partition assignments
while not consumer.assignment():
consumer.poll(timeout_ms=100)
time.sleep(0.1) # Small sleep to avoid tight loop
for tp in consumer.assignment():
consumer.seek(tp, last_offset)
max_messages = 5 # Process fewer messages to ensure the tick completes in time
processed_messages = 0
timeout_seconds = (
50 # Set a timeout less than 60 seconds to ensure we don't exceed it
)
start_time = time.time()
try:
for message in consumer.poll(
timeout_ms=1000, max_records=max_messages
).values():
for record in message:
if record.offset >= last_offset:
context.update_cursor(str(record.offset))
context.log.info(
f"Received message from S3_FILES_TOPIC: {record.value}"
)
yield RunRequest(
run_key=str(record.offset),
run_config={
"ops": {
"run_ml_consumer": {
"inputs": {
"message": record.value,
}
}
}
},
)
processed_messages += 1
if (time.time() - start_time) > timeout_seconds:
context.log.warning(
"Sensor execution nearing timeout, stopping processing"
)
consumer.commit()
return
if processed_messages == 0:
yield SkipReason("No new messages since the last run.")
else:
consumer.commit()
except Exception as e:
context.log.error(f"Error in s3_files_sensor: {e}")
raise
context.log.info("Completed s3_files_sensor evaluation")
import json
import boto3
import logging
from kafka import KafkaConsumer, KafkaProducer
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
logging.basicConfig(level=logging.INFO)
s3_client = boto3.resource('s3')
consumer = KafkaConsumer(
's3-files-topic',
bootstrap_servers='localhost:29092',
auto_offset_reset='earliest'
)
producer = KafkaProducer(
bootstrap_servers='localhost:29092',
value_serializer=lambda v: json.dumps(v).encode('utf-8')
)
def bucket_exists(bucket_name):
try:
s3_client.meta.client.head_bucket(Bucket=bucket_name)
return True
except boto3.exceptions.botocore.client.ClientError as e:
logging.error(f"Bucket {bucket_name} does not exist or you have no access. Error: {e}")
return False
for message in consumer:
try:
result = json.loads(message.value.decode('utf-8'))
s3_bucket = result['s3_bucket']
s3_key = result['s3_key']
logging.info(f"Processing file from bucket: {s3_bucket}, key: {s3_key}")
# Check if the bucket exists
if not bucket_exists(s3_bucket):
logging.error(f"Bucket {s3_bucket} does not exist. Skipping message.")
continue
# Fetch the JSON file from S3
s3_object = s3_client.Object(bucket_name=s3_bucket, key=s3_key)
json_content = json.loads(s3_object.get()['Body'].read().decode('utf-8'))
# Prepare the message for the next topic
forward_message = {
's3_bucket': s3_bucket,
's3_key': s3_key,
'json_content': json_content
}
# Send the message to ml-results-topic
producer.send('ml-results-topic', forward_message)
logging.info(f"Forwarded message to ml-results-topic: {forward_message}")
except NoCredentialsError:
logging.error("AWS credentials not found.")
except PartialCredentialsError as e:
logging.error(f"Incomplete AWS credentials provided. Error: {e}")
except Exception as e:
logging.error(f"Error processing message from S3: {e}")
The logs from the Kafka are showing the following:
INFO Starting s3_files_sensor evaluation 3:41:52.151 PM INFO Received message from S3_FILES_TOPIC: {"s3_bucket": "company-graph-test-bucket", "s3_key": "testing_file_12345678.json"} 3:41:52.501 PM INFO Completed s3_files_sensor evaluation 3:41:52.530 PM
It’s picking up the correct message, but for some reason is unable to connect to the bucket.
The bucket itself has Get,Put,List and all other options in the policy.
When I try to list the items in the bucket directly from within the docker container – it works without an issue.
Any ideas here?
Tried a ton of options to connect to S3 with different clients, but no success.