From a8806681250f60900dab5e997458e0be75ba4020 Mon Sep 17 00:00:00 2001 From: Rafael Fonseca Date: Mon, 12 Aug 2024 17:04:15 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9Belbv2:=20wait=20for=20LB=20active?= =?UTF-8?q?=20state=20instead=20of=20resolving=20DNS=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using DNS name resolution as a way to check the load balancer is working can cause problems that are dependent on the host running CAPA. In some systems, the DNS resolution can fail with very large TTLs cached DNS responses, causing very long provisioning times. Instead of DNS resolution, let's use the AWS API to check for the load balancer "active" state. Waiting for resolvable DNS names should be left for the clients to do. --- controllers/awscluster_controller.go | 8 ------ .../awscluster_controller_unit_test.go | 25 ------------------- controllers/helpers_test.go | 3 +++ pkg/cloud/services/elb/loadbalancer.go | 11 ++++++++ pkg/cloud/services/elb/loadbalancer_test.go | 13 ++++++++++ 5 files changed, 27 insertions(+), 33 deletions(-) diff --git a/controllers/awscluster_controller.go b/controllers/awscluster_controller.go index aeac110728..717d061e7f 100644 --- a/controllers/awscluster_controller.go +++ b/controllers/awscluster_controller.go @@ -19,7 +19,6 @@ package controllers import ( "context" "fmt" - "net" "time" "github.com/google/go-cmp/cmp" @@ -288,13 +287,6 @@ func (r *AWSClusterReconciler) reconcileLoadBalancer(clusterScope *scope.Cluster return &retryAfterDuration, nil } - clusterScope.Debug("Looking up IP address for DNS", "dns", awsCluster.Status.Network.APIServerELB.DNSName) - if _, err := net.LookupIP(awsCluster.Status.Network.APIServerELB.DNSName); err != nil { - clusterScope.Error(err, "failed to get IP address for dns name", "dns", awsCluster.Status.Network.APIServerELB.DNSName) - conditions.MarkFalse(awsCluster, infrav1.LoadBalancerReadyCondition, infrav1.WaitForDNSNameResolveReason, clusterv1.ConditionSeverityInfo, "") - clusterScope.Info("Waiting on API server ELB DNS name to resolve") - return &retryAfterDuration, nil - } conditions.MarkTrue(awsCluster, infrav1.LoadBalancerReadyCondition) awsCluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{ diff --git a/controllers/awscluster_controller_unit_test.go b/controllers/awscluster_controller_unit_test.go index e282d2bf79..be59c0a678 100644 --- a/controllers/awscluster_controller_unit_test.go +++ b/controllers/awscluster_controller_unit_test.go @@ -395,31 +395,6 @@ func TestAWSClusterReconcileOperations(t *testing.T) { g.Expect(err).To(BeNil()) expectAWSClusterConditions(g, cs.AWSCluster, []conditionAssertion{{infrav1.LoadBalancerReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitForDNSNameReason}}) }) - t.Run("Should fail AWSCluster create with LoadBalancer reconcile failure with WaitForDNSNameResolve condition as false", func(t *testing.T) { - g := NewWithT(t) - awsCluster := getAWSCluster("test", "test") - runningCluster := func() { - networkSvc.EXPECT().ReconcileNetwork().Return(nil) - sgSvc.EXPECT().ReconcileSecurityGroups().Return(nil) - ec2Svc.EXPECT().ReconcileBastion().Return(nil) - elbSvc.EXPECT().ReconcileLoadbalancers().Return(nil) - } - csClient := setup(t, &awsCluster) - defer teardown() - runningCluster() - cs, err := scope.NewClusterScope( - scope.ClusterScopeParams{ - Client: csClient, - Cluster: &clusterv1.Cluster{}, - AWSCluster: &awsCluster, - }, - ) - awsCluster.Status.Network.APIServerELB.DNSName = "test-apiserver.us-east-1.aws" - g.Expect(err).To(BeNil()) - _, err = reconciler.reconcileNormal(cs) - g.Expect(err).To(BeNil()) - expectAWSClusterConditions(g, cs.AWSCluster, []conditionAssertion{{infrav1.LoadBalancerReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitForDNSNameResolveReason}}) - }) }) }) t.Run("Reconcile delete AWSCluster", func(t *testing.T) { diff --git a/controllers/helpers_test.go b/controllers/helpers_test.go index 09d4402af8..72be2f58b1 100644 --- a/controllers/helpers_test.go +++ b/controllers/helpers_test.go @@ -291,6 +291,9 @@ func mockedCreateLBV2Calls(t *testing.T, m *mocks.MockELBV2APIMockRecorder) { LoadBalancerArn: lbArn, SecurityGroups: aws.StringSlice([]string{"sg-apiserver-lb"}), })).MaxTimes(1) + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: []*string{lbArn}, + })).MaxTimes(1) } func mockedDescribeTargetGroupsCall(t *testing.T, m *mocks.MockELBV2APIMockRecorder) { diff --git a/pkg/cloud/services/elb/loadbalancer.go b/pkg/cloud/services/elb/loadbalancer.go index 39a9f927be..a739e3706c 100644 --- a/pkg/cloud/services/elb/loadbalancer.go +++ b/pkg/cloud/services/elb/loadbalancer.go @@ -121,6 +121,17 @@ func (s *Service) reconcileV2LB(lbSpec *infrav1.AWSLoadBalancerSpec) error { return err } + wReq := &elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{lb.ARN}), + } + s.scope.Debug("Waiting for LB to become active", "api-server-lb-name", lb.Name) + waitStart := time.Now() + if err := s.ELBV2Client.WaitUntilLoadBalancerAvailableWithContext(context.TODO(), wReq); err != nil { + s.scope.Error(err, "failed to wait for LB to become available", "time", time.Since(waitStart)) + return err + } + s.scope.Debug("LB reports active state", "api-server-lb-name", lb.Name, "time", time.Since(waitStart)) + // set up the type for later processing lb.LoadBalancerType = lbSpec.LoadBalancerType if lb.IsManaged(s.scope.Name()) { diff --git a/pkg/cloud/services/elb/loadbalancer_test.go b/pkg/cloud/services/elb/loadbalancer_test.go index f2b4b1dbbe..212192bf13 100644 --- a/pkg/cloud/services/elb/loadbalancer_test.go +++ b/pkg/cloud/services/elb/loadbalancer_test.go @@ -2275,6 +2275,9 @@ func TestReconcileV2LB(t *testing.T) { }, nil, ) + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{elbArn}), + })).Return(nil) }, check: func(t *testing.T, lb *infrav1.LoadBalancer, err error) { t.Helper() @@ -2476,6 +2479,10 @@ func TestReconcileV2LB(t *testing.T) { LoadBalancerArn: aws.String(elbArn), Subnets: []*string{}, }).Return(&elbv2.SetSubnetsOutput{}, nil) + + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{elbArn}), + })).Return(nil) }, check: func(t *testing.T, lb *infrav1.LoadBalancer, err error) { t.Helper() @@ -2658,6 +2665,12 @@ func TestReconcileLoadbalancers(t *testing.T) { }, nil, ) + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{elbArn}), + })).Return(nil) + m.WaitUntilLoadBalancerAvailableWithContext(gomock.Any(), gomock.Eq(&elbv2.DescribeLoadBalancersInput{ + LoadBalancerArns: aws.StringSlice([]string{secondElbArn}), + })).Return(nil) }, check: func(t *testing.T, firstLB *infrav1.LoadBalancer, secondLB *infrav1.LoadBalancer, err error) { t.Helper()