Skip to content

Commit

Permalink
Merge pull request #5039 from mtulio/OCPBUGS-36293-fix-byoip-eip
Browse files Browse the repository at this point in the history
🐛 ec2/byoip: fix EIP leak when creating machine
  • Loading branch information
k8s-ci-robot authored Sep 10, 2024
2 parents 686a3f5 + 4626a6a commit 3f3ce56
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 57 deletions.
19 changes: 11 additions & 8 deletions controllers/awsmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,17 +533,20 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
}

// BYO Public IPv4 Pool feature: allocates and associates an EIP to machine when PublicIP and
// cluster-wide Public IPv4 Pool configuration are set. The EIP must be associated after the instance
// is created and transictioned from Pending state.
// In the regular flow, if the instance have already a public IPv4 address (EIP) associated it will
// be released when a new is assigned, the createInstance() prevents that behavior by enforcing
// to not launch an instance with EIP, allowing ReconcileElasticIPFromPublicPool assigning
// a BYOIP without duplication.
// cluster-wide config Public IPv4 Pool configuration are set. The custom EIP is associated
// after the instance is created and transictioned to Running state.
// The CreateInstance() is enforcing to not assign public IP address when PublicIP is set with
// BYOIpv4 Pool, preventing a duplicated EIP creation.
if pool := machineScope.GetElasticIPPool(); pool != nil {
if err := ec2svc.ReconcileElasticIPFromPublicPool(pool, instance); err != nil {
machineScope.Error(err, "failed to associate elastic IP address")
requeue, err := ec2svc.ReconcileElasticIPFromPublicPool(pool, instance)
if err != nil {
machineScope.Error(err, "Failed to reconcile BYO Public IPv4")
return ctrl.Result{}, err
}
if requeue {
machineScope.Debug("Found instance in pending state while reconciling publicIpv4Pool, requeue", "instance", instance.ID)
return ctrl.Result{RequeueAfter: DefaultReconcilerRequeue}, nil
}
}

if feature.Gates.Enabled(feature.EventBridgeInstanceState) {
Expand Down
2 changes: 1 addition & 1 deletion controllers/awsmachine_controller_unit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ func TestAWSMachineReconciler(t *testing.T) {
PublicIpv4Pool: aws.String("ipv4pool-ec2-0123456789abcdef0"),
PublicIpv4PoolFallBackOrder: ptr.To(infrav1.PublicIpv4PoolFallbackOrderAmazonPool),
}
ec2Svc.EXPECT().ReconcileElasticIPFromPublicPool(gomock.Any(), gomock.Any()).Return(nil).AnyTimes()
ec2Svc.EXPECT().ReconcileElasticIPFromPublicPool(gomock.Any(), gomock.Any()).Return(false, nil).AnyTimes()

_, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs, cs)
g.Expect(err).To(BeNil())
Expand Down
49 changes: 42 additions & 7 deletions pkg/cloud/services/ec2/eip.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"k8s.io/utils/ptr"

infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
Expand All @@ -16,17 +17,51 @@ func getElasticIPRoleName(instanceID string) string {
}

// ReconcileElasticIPFromPublicPool reconciles the elastic IP from a custom Public IPv4 Pool.
func (s *Service) ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) error {
// TODO: check if the instance is in the state allowing EIP association.
// Expected instance states: pending or running
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html
func (s *Service) ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) (bool, error) {
shouldRequeue := true
// Should not happen
if pool == nil {
return shouldRequeue, fmt.Errorf("unexpected behavior, pool must be set when reconcile ElasticIPPool")
}
iip := ptr.Deref(instance.PublicIP, "")
s.scope.Debug("Reconciling machine with custom Public IPv4 Pool", "instance-id", instance.ID, "instance-state", instance.State, "instance-public-ip", iip, "publicIpv4PoolID", pool.PublicIpv4Pool)

// Requeue when the instance is not ready to be associated.
if instance.State != infrav1.InstanceStateRunning {
s.scope.Debug("Unable to reconcile Elastic IP Pool for instance", "instance-id", instance.ID, "instance-state", instance.State)
return shouldRequeue, nil
}

// All done, must reconcile only when the instance is in running state.
shouldRequeue = false

// Prevent running association every reconciliation when it is already done.
addrs, err := s.netService.GetAddresses(getElasticIPRoleName(instance.ID))
if err != nil {
s.scope.Error(err, "error checking if addresses exists for Elastic IP Pool to machine", "eip-role", getElasticIPRoleName(instance.ID))
return shouldRequeue, err
}
if len(addrs.Addresses) > 0 {
if len(addrs.Addresses) != 1 {
return shouldRequeue, fmt.Errorf("unexpected number of EIPs allocated to the role. expected 1, got %d", len(addrs.Addresses))
}
addr := addrs.Addresses[0]
// address is already associated.
if addr.AssociationId != nil && addr.InstanceId != nil && *addr.InstanceId == instance.ID {
s.scope.Debug("Machine is already associated with an Elastic IP with custom Public IPv4 pool", "eip", addr.AllocationId, "eip-address", addr.PublicIp, "eip-associationID", addr.AssociationId, "eip-instance", addr.InstanceId)
return shouldRequeue, nil
}
}

// Get existing, or allocate an EIP, then Associate to the machine.
// Should requeue if any error is returned in the process.
if err := s.getAndAssociateAddressesToInstance(pool, getElasticIPRoleName(instance.ID), instance.ID); err != nil {
return fmt.Errorf("failed to reconcile Elastic IP: %w", err)
return true, fmt.Errorf("failed to reconcile Elastic IP: %w", err)
}
return nil
return shouldRequeue, nil
}

// ReleaseElasticIP releases a specific elastic IP based on the instance role.
// ReleaseElasticIP releases a specific Elastic IP based on the instance role.
func (s *Service) ReleaseElasticIP(instanceID string) error {
return s.netService.ReleaseAddressByRole(getElasticIPRoleName(instanceID))
}
Expand Down
22 changes: 7 additions & 15 deletions pkg/cloud/services/ec2/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -572,21 +572,13 @@ func (s *Service) runInstance(role string, i *infrav1.Instance) (*infrav1.Instan

input.NetworkInterfaces = netInterfaces
} else {
if ptr.Deref(i.PublicIPOnLaunch, false) {
input.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String(i.SubnetID),
Groups: aws.StringSlice(i.SecurityGroupIDs),
AssociatePublicIpAddress: i.PublicIPOnLaunch,
},
}
} else {
input.SubnetId = aws.String(i.SubnetID)

if len(i.SecurityGroupIDs) > 0 {
input.SecurityGroupIds = aws.StringSlice(i.SecurityGroupIDs)
}
input.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String(i.SubnetID),
Groups: aws.StringSlice(i.SecurityGroupIDs),
AssociatePublicIpAddress: i.PublicIPOnLaunch,
},
}
}

Expand Down
66 changes: 48 additions & 18 deletions pkg/cloud/services/ec2/instances_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -672,11 +672,16 @@ func TestCreateInstance(t *testing.T) {
}, nil)
m.
RunInstancesWithContext(context.TODO(), &ec2.RunInstancesInput{
ImageId: aws.String("abc"),
InstanceType: aws.String("m5.2xlarge"),
KeyName: aws.String("default"),
SecurityGroupIds: aws.StringSlice([]string{"2", "3"}),
SubnetId: aws.String("subnet-3"),
ImageId: aws.String("abc"),
InstanceType: aws.String("m5.2xlarge"),
KeyName: aws.String("default"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-3"),
Groups: aws.StringSlice([]string{"2", "3"}),
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -920,11 +925,16 @@ func TestCreateInstance(t *testing.T) {
}, nil)
m.
RunInstancesWithContext(context.TODO(), &ec2.RunInstancesInput{
ImageId: aws.String("abc"),
InstanceType: aws.String("m5.2xlarge"),
KeyName: aws.String("default"),
SecurityGroupIds: aws.StringSlice([]string{"4", "3"}),
SubnetId: aws.String("subnet-5"),
ImageId: aws.String("abc"),
InstanceType: aws.String("m5.2xlarge"),
KeyName: aws.String("default"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-5"),
Groups: aws.StringSlice([]string{"4", "3"}),
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -3346,8 +3356,13 @@ func TestCreateInstance(t *testing.T) {
Placement: &ec2.Placement{
Tenancy: &tenancy,
},
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
SubnetId: aws.String("subnet-1"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-1"),
Groups: []*string{aws.String("2"), aws.String("3")},
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -3555,8 +3570,13 @@ func TestCreateInstance(t *testing.T) {
Placement: &ec2.Placement{
GroupName: aws.String("placement-group1"),
},
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
SubnetId: aws.String("subnet-1"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-1"),
Groups: []*string{aws.String("2"), aws.String("3")},
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -3785,8 +3805,13 @@ func TestCreateInstance(t *testing.T) {
Tenancy: &tenancy,
GroupName: aws.String("placement-group1"),
},
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
SubnetId: aws.String("subnet-1"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-1"),
Groups: []*string{aws.String("2"), aws.String("3")},
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -3976,8 +4001,13 @@ func TestCreateInstance(t *testing.T) {
GroupName: aws.String("placement-group1"),
PartitionNumber: aws.Int64(2),
},
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
SubnetId: aws.String("subnet-1"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-1"),
Groups: aws.StringSlice([]string{"2", "3"}),
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down
2 changes: 1 addition & 1 deletion pkg/cloud/services/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ type EC2Interface interface {
DeleteBastion() error
ReconcileBastion() error
// ReconcileElasticIPFromPublicPool reconciles the elastic IP from a custom Public IPv4 Pool.
ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) error
ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) (bool, error)

// ReleaseElasticIP reconciles the elastic IP from a custom Public IPv4 Pool.
ReleaseElasticIP(instanceID string) error
Expand Down
7 changes: 4 additions & 3 deletions pkg/cloud/services/mock_services/ec2_interface_mock.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 9 additions & 4 deletions pkg/cloud/services/network/eips.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,17 @@ func (s *Service) GetOrAllocateAddresses(pool *infrav1.ElasticIPPool, num int, r
return s.getOrAllocateAddresses(num, role, pool)
}

// GetAddresses returns the address associated to a given role.
func (s *Service) GetAddresses(role string) (*ec2.DescribeAddressesOutput, error) {
return s.describeAddresses(role)
}

// ReleaseAddressByRole releases EIP addresses filtering by tag CAPA provider role.
func (s *Service) ReleaseAddressByRole(role string) error {
clusterFilter := []*ec2.Filter{filter.EC2.Cluster(s.scope.Name())}
clusterFilter = append(clusterFilter, filter.EC2.ProviderRole(role))

return s.releaseAddressesWithFilter(clusterFilter)
return s.releaseAddressesWithFilter([]*ec2.Filter{
filter.EC2.ClusterOwned(s.scope.Name()),
filter.EC2.ProviderRole(role),
})
}

// setByoPublicIpv4 check if the config has Public IPv4 Pool defined, then
Expand Down

0 comments on commit 3f3ce56

Please sign in to comment.