I have a Kubernetes cluster running on AWS EKS, and I’m using an Application Load Balancer (ALB) with Ingress to route traffic to different services in my cluster. I’ve configured paths in my Ingress resource to forward traffic to different services on various ports:
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: uptime-ingress-9
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
spec:
ingressClassName: alb
rules:
- http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: uptime-webapp
port:
number: 80
- path: /api
pathType: Prefix
backend:
service:
name: uptime-server
port:
number: 3000
- path: /pyapi
pathType: Prefix
backend:
service:
name: uptime-ml-mw
port:
number: 8000
- path: /socket.io/
pathType: Prefix
backend:
service:
name: uptime-ml-mw-socket
port:
number: 8002
However, I’m facing an issue where the ALB marks ports other than 80 as unhealthy. For example, requests to /api
, /pyapi
, and /socket.io/
paths are being marked as unhealthy, while requests to /
path (port 80) are working fine.
I’ve checked the following:
-
Ingress Configuration: The Ingress resource seems to be configured correctly, with paths pointing to services on different ports.
-
AWS Security Groups and Network ACLs: I’ve ensured that the AWS Security Groups and Network ACLs allow traffic on the required ports. I have ingress rules for ports 80 and 443 configured in my security group, but traffic on other ports (3000, 8000, 8002) might be blocked.
-
ALB Controller: I’m using the ALB Ingress Controller, and I’ve verified that it’s correctly configured to forward traffic to the appropriate ports on my backend services.
-
Service Health Checks: Health checks are configured for my backend services, but they might not be correctly monitoring the endpoints on ports other than 80.
-
Logging and Monitoring: Logging and monitoring are enabled for my ALB and backend services, but I haven’t found any relevant information that could help diagnose the issue.
-
Pod Readiness Probes: Readiness probes are configured for my backend services, but I’m not sure if they’re correctly monitoring the endpoints on ports other than 80.
I’m unsure what else to check or how to resolve this issue. Any insights or suggestions on how to troubleshoot and fix this problem would be greatly appreciated. Thank you!
Here is my Terraform Script if you need it for reference :
# Create AWS VPC
resource "aws_vpc" "uptime_cluster_vpc" {
cidr_block = "192.168.0.0/16"
enable_dns_support = true
enable_dns_hostnames = true
tags = {
Name = "Uptime VPC"
}
}
# Create a new VPC for the ALB
resource "aws_vpc" "alb_vpc" {
cidr_block = "10.0.0.0/16" # Adjust CIDR block as needed
enable_dns_support = true
enable_dns_hostnames = true
tags = {
Name = "ALB VPC"
}
}
# Create Internet Gateway
resource "aws_internet_gateway" "uptime_igw" {
vpc_id = aws_vpc.uptime_cluster_vpc.id
}
# Create Internet Gateway for ALB VPC
resource "aws_internet_gateway" "alb_igw" {
vpc_id = aws_vpc.alb_vpc.id
}
# Create Route Table
resource "aws_route_table" "uptime_route_table" {
vpc_id = aws_vpc.uptime_cluster_vpc.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.uptime_igw.id
}
}
# Create AWS Security Group for Kubernetes cluster nodes
resource "aws_security_group" "uptime_cluster_sg" {
name = var.cluster_sg_name
vpc_id = aws_vpc.uptime_cluster_vpc.id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = [aws_vpc.alb_vpc.cidr_block]
}
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = [aws_vpc.alb_vpc.cidr_block]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
self = true
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
# Create AWS Subnets for Kubernetes cluster
resource "aws_subnet" "subnet_abc123" {
vpc_id = aws_vpc.uptime_cluster_vpc.id
cidr_block = "192.168.0.0/17"
availability_zone = "ap-south-1a"
map_public_ip_on_launch = true
tags = {
"kubernetes.io/role/internal-elb" = "1"
"kubernetes.io/cluster/uptime_cluster" = "shared"
"Name" = "eks-subnet-abc123"
}
}
resource "aws_subnet" "subnet_def456" {
vpc_id = aws_vpc.uptime_cluster_vpc.id
cidr_block = "192.168.128.0/17"
availability_zone = "ap-south-1b"
map_public_ip_on_launch = true
tags = {
"kubernetes.io/role/internal-elb" = "1"
"kubernetes.io/cluster/uptime_cluster" = "shared"
"Name" = "eks-subnet-def456"
}
}
# Create subnets for the ALB VPC
resource "aws_subnet" "alb_subnet_a" {
vpc_id = aws_vpc.alb_vpc.id
cidr_block = "10.0.1.0/24" # Adjust CIDR block as needed
availability_zone = "ap-south-1a" # Adjust availability zone as needed
map_public_ip_on_launch = true
tags = {
Name = "ALB Subnet A"
"kubernetes.io/role/elb" = "1"
"kubernetes.io/cluster/uptime_cluster" = "shared"
}
}
resource "aws_subnet" "alb_subnet_b" {
vpc_id = aws_vpc.alb_vpc.id
cidr_block = "10.0.2.0/24" # Adjust CIDR block as needed
availability_zone = "ap-south-1b" # Adjust availability zone as needed
map_public_ip_on_launch = true
tags = {
Name = "ALB Subnet B"
"kubernetes.io/role/elb" = "1"
"kubernetes.io/cluster/uptime_cluster" = "shared"
}
}
# Create VPC peering connection from VPC1 to VPC2 without auto_accept
resource "aws_vpc_peering_connection" "cluster_to_alb_peering" {
peer_vpc_id = aws_vpc.alb_vpc.id
vpc_id = aws_vpc.uptime_cluster_vpc.id
auto_accept = true
tags = {
Name = "Cluster to ALB Peering"
}
}
resource "aws_vpc_peering_connection_options" "cluster_to_alb_peering" {
vpc_peering_connection_id = aws_vpc_peering_connection.cluster_to_alb_peering.id
accepter {
allow_remote_vpc_dns_resolution = true
}
}
# Create route table for ALB VPC
resource "aws_route_table" "alb_route_table" {
vpc_id = aws_vpc.alb_vpc.id
route {
cidr_block = "0.0.0.0/0"
gateway_id = aws_internet_gateway.alb_igw.id
}
}
# Associate Subnets with Route Table
resource "aws_route_table_association" "subnet_abc123_association" {
subnet_id = aws_subnet.subnet_abc123.id
route_table_id = aws_route_table.uptime_route_table.id
}
resource "aws_route_table_association" "subnet_def456_association" {
subnet_id = aws_subnet.subnet_def456.id
route_table_id = aws_route_table.uptime_route_table.id
}
# Associate Subnets with Route Table for ALB VPC
resource "aws_route_table_association" "alb_subnet_a_association" {
subnet_id = aws_subnet.alb_subnet_a.id
route_table_id = aws_route_table.alb_route_table.id
}
resource "aws_route_table_association" "alb_subnet_b_association" {
subnet_id = aws_subnet.alb_subnet_b.id
route_table_id = aws_route_table.alb_route_table.id
}
# Add Route to Route Table in Cluster VPC for ALB CIDR
resource "aws_route" "cluster_to_alb_route" {
route_table_id = aws_route_table.uptime_route_table.id
destination_cidr_block = aws_vpc.alb_vpc.cidr_block
vpc_peering_connection_id = aws_vpc_peering_connection.cluster_to_alb_peering.id
}
# Add Route to Route Table in ALB VPC for Cluster CIDR
resource "aws_route" "alb_to_cluster_route" {
route_table_id = aws_route_table.alb_route_table.id
destination_cidr_block = aws_vpc.uptime_cluster_vpc.cidr_block
vpc_peering_connection_id = aws_vpc_peering_connection.cluster_to_alb_peering.id
}
resource "aws_eks_cluster" "uptime_cluster" {
name = "uptime_cluster"
role_arn = aws_iam_role.my_eks_cluster_role.arn
vpc_config {
subnet_ids = [aws_subnet.subnet_abc123.id, aws_subnet.subnet_def456.id] # Specify your subnet IDs
security_group_ids = [aws_security_group.uptime_cluster_sg.id] # Specify your security group ID
endpoint_public_access = true
endpoint_private_access = true
}
tags = {
Environment = "Test"
}
}
resource "aws_iam_role" "my_eks_cluster_role" {
name = "my-eks-cluster-role"
assume_role_policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "eks.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
EOF
}
resource "aws_iam_role_policy_attachment" "eks_cluster_policy" {
role = aws_iam_role.my_eks_cluster_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
}
resource "aws_eks_node_group" "webapp_server" {
cluster_name = aws_eks_cluster.uptime_cluster.name
node_group_name = "webapp-server"
node_role_arn = aws_iam_role.my_node_group_role.arn
subnet_ids = [aws_subnet.subnet_abc123.id]
scaling_config {
desired_size = 1
min_size = 1
max_size = 2
}
instance_types = ["t2.small"]
labels = { "app-stack" = "webapp-server" }
tags = {
Environment = "Production"
}
}
resource "aws_eks_node_group" "connector" {
cluster_name = aws_eks_cluster.uptime_cluster.name
node_group_name = "connector"
node_role_arn = aws_iam_role.my_node_group_role.arn
subnet_ids = [aws_subnet.subnet_abc123.id]
scaling_config {
desired_size = 1
min_size = 1
max_size = 1
}
instance_types = ["t2.small"]
labels = { "app-stack" = "connector" }
tags = {
Environment = "Production"
}
}
resource "aws_eks_node_group" "db" {
cluster_name = aws_eks_cluster.uptime_cluster.name
node_group_name = "database"
node_role_arn = aws_iam_role.my_node_group_role.arn
subnet_ids = [aws_subnet.subnet_abc123.id]
scaling_config {
desired_size = 2
min_size = 2
max_size = 2
}
instance_types = ["t2.medium"]
labels = { "app-stack" = "db" }
tags = {
Environment = "Production"
}
}
resource "aws_eks_node_group" "ml_mw_v" {
cluster_name = aws_eks_cluster.uptime_cluster.name
node_group_name = "mlmwv"
node_role_arn = aws_iam_role.my_node_group_role.arn
subnet_ids = [aws_subnet.subnet_abc123.id]
scaling_config {
desired_size = 1
min_size = 1
max_size = 1
}
instance_types = ["t2.medium"]
labels = { "app-stack" = "ml-mw-v" }
tags = {
Environment = "Production"
}
}
resource "aws_eks_node_group" "ml_prediction_engine_kafka" {
cluster_name = aws_eks_cluster.uptime_cluster.name
node_group_name = "ml-prediction-engine-kafka"
node_role_arn = aws_iam_role.my_node_group_role.arn
subnet_ids = [aws_subnet.subnet_abc123.id]
scaling_config {
desired_size = 1
min_size = 1
max_size = 1
}
instance_types = ["t2.medium"]
labels = { "app-stack" = "ml-prediction-engine-kafka" }
tags = {
Environment = "Production"
}
}
resource "aws_iam_role" "my_node_group_role" {
name = "my-node-group-role"
assume_role_policy = <<EOF
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "ec2.amazonaws.com"
},
"Action": "sts:AssumeRole"
}
]
}
EOF
}
resource "aws_iam_role_policy_attachment" "eks_worker_node_policy" {
role = aws_iam_role.my_node_group_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy"
}
resource "aws_iam_role_policy_attachment" "eks_cni_policy" {
role = aws_iam_role.my_node_group_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy"
}
resource "aws_iam_role_policy_attachment" "ec2_container_registry_read_only" {
role = aws_iam_role.my_node_group_role.name
policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly"
}
resource "aws_iam_role_policy_attachment" "AmazonEBSCSIDriverPolicy" {
role = aws_iam_role.my_node_group_role.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy"
}
resource "aws_iam_role_policy_attachment" "ElasticLoadBalancingFullAccess" {
role = aws_iam_role.my_node_group_role.name
policy_arn = "arn:aws:iam::aws:policy/ElasticLoadBalancingFullAccess"
}
data "aws_caller_identity" "current" {}
resource "aws_iam_policy" "create_security_group_policy" {
name = "CreateSecurityGroupPolicy"
description = "Allows creating security groups in a specific VPC"
policy = jsonencode({
"Version": "2012-10-17",
"Statement": [
{
Effect = "Allow",
Action = [
"ec2:CreateSecurityGroup",
"ec2:DescribeSecurityGroups",
"ec2:DescribeSecurityGroupRules",
"ec2:DescribeTags",
"ec2:CreateTags",
"ec2:AuthorizeSecurityGroupIngress",
"ec2:RevokeSecurityGroupIngress",
"ec2:AuthorizeSecurityGroupEgress",
"ec2:RevokeSecurityGroupEgress",
"ec2:ModifySecurityGroupRules",
"ec2:UpdateSecurityGroupRuleDescriptionsIngress",
"ec2:UpdateSecurityGroupRuleDescriptionsEgress"
],
Resource = "*"
},
{
Effect = "Allow",
Action = [
"wafv2:GetWebACL",
"wafv2:GetWebACLForResource",
"wafv2:AssociateWebACL",
"wafv2:DisassociateWebACL",
"waf-regional:GetWebACLForResource",
"waf-regional:GetWebACL",
"waf-regional:AssociateWebACL",
"waf-regional:DisassociateWebACL"
],
Resource = "*"
}
]
})
}
resource "aws_iam_policy_attachment" "attach_create_security_group_policy" {
name = "AttachCreateSecurityGroupPolicy"
roles = [aws_iam_role.my_node_group_role.name] # Replace with your IAM role name
policy_arn = aws_iam_policy.create_security_group_policy.arn
}
# Create security group for ALB
resource "aws_security_group" "alb_security_group" {
name = "alb-security-group"
description = "Security group for ALB"
vpc_id = aws_vpc.alb_vpc.id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"] # Assuming uptime_cluster_sg is your EKS cluster nodes security group
}
tags = {
Name = "alb-security-group"
}
}
# alb controller
resource "helm_release" "alb_controller" {
name = "alb-controller"
chart = "aws-load-balancer-controller"
repository = "https://aws.github.io/eks-charts"
version = "1.4.6"
# Set values for the ALB controller
set {
name = "autoDiscoverAwsRegion"
value = "true"
}
set {
name = "vpcId"
value = aws_vpc.alb_vpc.id
}
set {
name = "clusterName"
value = aws_eks_cluster.uptime_cluster.name
}
set {
name = "subnetTags.kubernetes.io/role/elb"
value = "1"
}
# Define namespace for the ALB controller
namespace = "kube-system"
}
data "aws_security_group" "uptime_cluster_sg" {
id = aws_security_group.uptime_cluster_sg.id
}
data "aws_security_group" "alb_security_group" {
id = aws_security_group.alb_security_group.id
}
resource "aws_security_group_rule" "allow_cluster_egress" {
type = "egress"
from_port = 80
to_port = 80
protocol = "tcp"
security_group_id = aws_security_group.alb_security_group.id
cidr_blocks = [aws_vpc.uptime_cluster_vpc.cidr_block]
}
# Update Network ACL for VPC1 (Cluster VPC)
resource "aws_network_acl_rule" "allow_alb_traffic_inbound" {
network_acl_id = aws_vpc.uptime_cluster_vpc.default_network_acl_id
rule_number = 200
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.alb_vpc.cidr_block
from_port = 80 # Adjust port as needed
to_port = 80 # Adjust port as needed
egress = false
}
resource "aws_network_acl_rule" "allow_alb_traffic_outbound" {
network_acl_id = aws_vpc.uptime_cluster_vpc.default_network_acl_id
rule_number = 200
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.alb_vpc.cidr_block
from_port = 80 # Adjust port as needed
to_port = 80 # Adjust port as needed
egress = true
}
# Update Network ACL for VPC2 (ALB VPC)
resource "aws_network_acl_rule" "allow_cluster_traffic_inbound" {
network_acl_id = aws_vpc.alb_vpc.default_network_acl_id
rule_number = 200
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.uptime_cluster_vpc.cidr_block
from_port = 80 # Adjust port as needed
to_port = 80 # Adjust port as needed
egress = false
}
resource "aws_network_acl_rule" "allow_cluster_traffic_outbound" {
network_acl_id = aws_vpc.alb_vpc.default_network_acl_id
rule_number = 200
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.uptime_cluster_vpc.cidr_block
from_port = 80 # Adjust port as needed
to_port = 80 # Adjust port as needed
egress = true
}
I have tried to address the issue by adding aws_network_acl_rule
entries for ports 3000 and 8000, and also updated the security groups to include ingress rules for these ports. However, this didn’t resolve the issue. In fact, when I added rules for ports 3000 and 8000, requests to port 80 also stopped working, indicating that there might be some interference or conflict between the configurations.
I’m not entirely sure why adding rules for ports 3000 and 8000 would affect port 80, as they are distinct ports with separate configurations. It’s possible that there’s an issue with how the rules are being applied or interpreted by the ALB and Kubernetes Ingress.
I’ve double-checked the syntax and configuration of the aws_network_acl_rule
and security group
rules to ensure they are correct. However, I’m still facing the same issue with ports other than 80 being marked as unhealthy.
The followings are the configurations that I tried to update in my terraform script.
Security Group for Cluster
resource "aws_security_group" "uptime_cluster_sg" {
name = var.cluster_sg_name
vpc_id = aws_vpc.uptime_cluster_vpc.id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = [aws_vpc.alb_vpc.cidr_block]
}
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = [aws_vpc.alb_vpc.cidr_block]
}
ingress {
from_port = 8000
to_port = 8000
protocol = "tcp"
cidr_blocks = [aws_vpc.alb_vpc.cidr_block]
}
ingress {
from_port = 3000
to_port = 3000
protocol = "tcp"
cidr_blocks = [aws_vpc.alb_vpc.cidr_block]
}
ingress {
from_port = 0
to_port = 0
protocol = "-1"
self = true
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
}
Security Group For Load Balancer
resource "aws_security_group" "alb_security_group" {
name = "alb-security-group"
description = "Security group for ALB"
vpc_id = aws_vpc.alb_vpc.id
ingress {
from_port = 80
to_port = 80
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 443
to_port = 443
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 8000
to_port = 8000
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
ingress {
from_port = 3000
to_port = 3000
protocol = "tcp"
cidr_blocks = ["0.0.0.0/0"]
}
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = {
Name = "alb-security-group"
}
}
Network Rules
resource "aws_network_acl_rule" "allow_alb_traffic_inbound" {
network_acl_id = aws_vpc.uptime_cluster_vpc.default_network_acl_id
rule_number = 200
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.alb_vpc.cidr_block
from_port = 80
to_port = 80
egress = false
}
resource "aws_network_acl_rule" "allow_alb_traffic_outbound" {
network_acl_id = aws_vpc.uptime_cluster_vpc.default_network_acl_id
rule_number = 200
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.alb_vpc.cidr_block
from_port = 80
to_port = 80
egress = true
}
resource "aws_network_acl_rule" "allow_alb_traffic_inbound_8000" {
network_acl_id = aws_vpc.uptime_cluster_vpc.default_network_acl_id
rule_number = 201
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.alb_vpc.cidr_block
from_port = 8000
to_port = 8000
egress = false
}
resource "aws_network_acl_rule" "allow_alb_traffic_outbound_8000" {
network_acl_id = aws_vpc.uptime_cluster_vpc.default_network_acl_id
rule_number = 201
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.alb_vpc.cidr_block
from_port = 8000
to_port = 8000
egress = true
}
resource "aws_network_acl_rule" "allow_alb_traffic_inbound_3000" {
network_acl_id = aws_vpc.uptime_cluster_vpc.default_network_acl_id
rule_number = 202
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.alb_vpc.cidr_block
from_port = 3000
to_port = 3000
egress = false
}
resource "aws_network_acl_rule" "allow_alb_traffic_outbound_3000" {
network_acl_id = aws_vpc.uptime_cluster_vpc.default_network_acl_id
rule_number = 202
protocol = "tcp"
rule_action = "allow"
cidr_block = aws_vpc.alb_vpc.cidr_block
from_port = 3000
to_port = 3000
egress = true
}