From 3b13a2ef666a26c54291ee09f5ee62fb5bd2b771 Mon Sep 17 00:00:00 2001 From: Michael Burman Date: Thu, 9 Jan 2025 15:49:24 +0200 Subject: [PATCH] cass-operator v1.23.0 (#1461) * cass-operator v1.23.0 API changes, such as SanitiziedName -> LabelResourceName Add updated CRD and exposed method Change name generation to follow the real naming from cass-operator Update tag to 1.23.0 and fix medusa tests Change DcPrefix() in the test Some resourceName prefix check fixes Change schemas.go check to use CassandraDatacenter Fix status updates and add a new status field, contextName for the Datacenter Change back slower polling intervals for the tests, or they start to fail. * Update CHANGELOG --- CHANGELOG/CHANGELOG-1.21.md | 5 +- .../v1alpha1/k8ssandracluster_types.go | 4 +- .../v1alpha1/k8ssandracluster_webhook.go | 22 ++++- .../crds/k8ssandra-operator-crds.yaml | 11 ++- .../cluster-scoped/kustomization.yaml | 2 +- .../ns-scoped/kustomization.yaml | 2 +- .../bases/k8ssandra.io_k8ssandraclusters.yaml | 11 ++- .../cassandra_telemetry_reconciler.go | 4 +- controllers/k8ssandra/cleanup.go | 80 ++++++++++--------- .../k8ssandra/contact_points_service.go | 3 +- controllers/k8ssandra/datacenters.go | 12 ++- controllers/k8ssandra/reaper.go | 8 +- controllers/k8ssandra/schemas.go | 32 ++++++-- controllers/k8ssandra/seeds.go | 4 +- controllers/k8ssandra/stargate.go | 9 +-- controllers/medusa/controllers_test.go | 12 +-- .../medusa/medusabackupjob_controller_test.go | 17 ++-- .../medusa/medusarestorejob_controller.go | 7 +- .../medusarestorejob_controller_test.go | 11 +-- controllers/reaper/reaper_controller_test.go | 17 ++-- go.mod | 2 +- go.sum | 4 +- pkg/cassandra/management.go | 9 +-- pkg/cassandra/tokens.go | 20 +++-- pkg/k8ssandra/util.go | 13 +-- pkg/k8ssandra/util_test.go | 34 -------- pkg/medusa/hostmap.go | 33 ++++---- pkg/medusa/hostmap_test.go | 46 ++++++++++- pkg/medusa/requests.go | 7 +- pkg/medusa/utils.go | 5 +- pkg/test/testenv.go | 2 +- test/e2e/auth_test.go | 27 ++++--- test/e2e/medusa_test.go | 15 ++-- test/e2e/per_node_config_test.go | 6 +- test/e2e/reaper_test.go | 19 ++--- test/e2e/stop_dc_test.go | 15 ++-- test/e2e/suite_test.go | 32 +++++--- test/framework/e2e_framework.go | 2 +- 38 files changed, 318 insertions(+), 246 deletions(-) delete mode 100644 pkg/k8ssandra/util_test.go diff --git a/CHANGELOG/CHANGELOG-1.21.md b/CHANGELOG/CHANGELOG-1.21.md index 74695fb43..3e2a9a65c 100644 --- a/CHANGELOG/CHANGELOG-1.21.md +++ b/CHANGELOG/CHANGELOG-1.21.md @@ -15,8 +15,11 @@ When cutting a new release, update the `unreleased` heading to the tag being gen ## unreleased +* [CHANGE] [#1450](https://github.com/k8ssandra/k8ssandra-operator/issues/1450) Update datacenter labels to use Kubernetes resource names for CassandraDatacenter, not the cleaned override name. Update to cass-operator 1.23.0 * [CHANGE] [#1441](https://github.com/k8ssandra/k8ssandra-operator/issues/1441) Use k8ssandra-client instead of k8ssandra-tools for CRD upgrades -* [BUGFIX] [#1383](https://github.com/k8ssandra/k8ssandra-operator/issues/1383) Do not create MedusaBackup if MedusaBakupJob did not fully succeed * [ENHANCEMENT] [#1667](https://github.com/k8ssahttps://github.com/k8ssandra/k8ssandra/issues/1667) Add `skipSchemaMigration` option to `K8ssandraCluster.spec.reaper` * [FEATURE] [#1034](https://github.com/k8ssandra/k8ssandra-operator/issues/1034) Add support for priorityClassName * [BUGFIX] [#1454](https://github.com/k8ssandra/k8ssandra-operator/issues/1454) Do not try to work out backup status if there are no pods +* [BUGFIX] [#1383](https://github.com/k8ssandra/k8ssandra-operator/issues/1383) Do not create MedusaBackup if MedusaBakupJob did not fully succeed +* [BUGFIX] [#1460](https://github.com/k8ssandra/k8ssandra-operator/issues/1460) Fix podName calculations in medusa's hostmap.go to account for unbalanced racks also +* [BUGFIX] [#1466](https://github.com/k8ssandra/k8ssandra-operator/issues/1466) Do not overwrite existing status fields or forget to write the changes. Also, add new ContextName for the Datacenter to know where it used to be. \ No newline at end of file diff --git a/apis/k8ssandra/v1alpha1/k8ssandracluster_types.go b/apis/k8ssandra/v1alpha1/k8ssandracluster_types.go index 3d71c44fa..e1de27388 100644 --- a/apis/k8ssandra/v1alpha1/k8ssandracluster_types.go +++ b/apis/k8ssandra/v1alpha1/k8ssandracluster_types.go @@ -143,6 +143,7 @@ type K8ssandraClusterCondition struct { // K8ssandraStatus defines the observed of a k8ssandra instance type K8ssandraStatus struct { + ContextName string `json:"contextName,omitempty"` DecommissionProgress DecommissionProgress `json:"decommissionProgress,omitempty"` Cassandra *cassdcapi.CassandraDatacenterStatus `json:"cassandra,omitempty"` Stargate *stargateapi.StargateStatus `json:"stargate,omitempty"` @@ -443,8 +444,7 @@ type DatacenterOptions struct { // The k8s service account to use for the Cassandra pods ServiceAccount string `json:"serviceAccount,omitempty"` - // DatacenterName allows to override the name of the Cassandra datacenter. Kubernetes objects will be named after a sanitized version of it if set, and if not metadata.name. In Cassandra the DC name will be overridden by this value. - // It may generate some confusion as objects created for the DC will have a different name than the CasandraDatacenter object itself. + // DatacenterName allows to override the name of the Cassandra datacenter. In Cassandra the DC name will be overridden by this value. // This setting can create conflicts if multiple DCs coexist in the same namespace if metadata.name for a DC with no override is set to the same value as the override name of another DC. // Use cautiously. // +optional diff --git a/apis/k8ssandra/v1alpha1/k8ssandracluster_webhook.go b/apis/k8ssandra/v1alpha1/k8ssandracluster_webhook.go index 92043e3c7..ec9e30a0e 100644 --- a/apis/k8ssandra/v1alpha1/k8ssandracluster_webhook.go +++ b/apis/k8ssandra/v1alpha1/k8ssandracluster_webhook.go @@ -21,6 +21,8 @@ import ( "strings" "github.com/Masterminds/semver/v3" + cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" + "github.com/k8ssandra/cass-operator/pkg/reconciliation" reaperapi "github.com/k8ssandra/k8ssandra-operator/apis/reaper/v1alpha1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -132,15 +134,31 @@ func (r *K8ssandraCluster) validateK8ssandraCluster() error { } func (r *K8ssandraCluster) validateStatefulsetNameSize() error { + clusterName := r.ObjectMeta.Name + if r.Spec.Cassandra.ClusterName != "" { + clusterName = r.Spec.Cassandra.ClusterName + } + for _, dc := range r.Spec.Cassandra.Datacenters { + realDc := &cassdcapi.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: dc.Meta.Name, + }, + Spec: cassdcapi.CassandraDatacenterSpec{ + ClusterName: clusterName, + }, + } + if len(dc.Racks) > 0 { for _, rack := range dc.Racks { - if len(r.SanitizedName()+"-"+dc.CassDcName()+"-"+rack.Name+"-sts-") > 60 { + stsName := reconciliation.NewNamespacedNameForStatefulSet(realDc, rack.Name) + if len(stsName.Name) > 60 { return fmt.Errorf("the name of the statefulset for rack %s in DC %s is too long", rack.Name, dc.CassDcName()) } } } else { - if len(r.SanitizedName()+"-"+dc.CassDcName()+"-default-sts-") > 60 { + stsName := reconciliation.NewNamespacedNameForStatefulSet(realDc, "default") + if len(stsName.Name) > 60 { return fmt.Errorf("the name of the statefulset for DC %s is too long", dc.CassDcName()) } } diff --git a/charts/k8ssandra-operator/crds/k8ssandra-operator-crds.yaml b/charts/k8ssandra-operator/crds/k8ssandra-operator-crds.yaml index c9f76a440..cfa308f26 100644 --- a/charts/k8ssandra-operator/crds/k8ssandra-operator-crds.yaml +++ b/charts/k8ssandra-operator/crds/k8ssandra-operator-crds.yaml @@ -2222,8 +2222,7 @@ spec: type: array datacenterName: description: |- - DatacenterName allows to override the name of the Cassandra datacenter. Kubernetes objects will be named after a sanitized version of it if set, and if not metadata.name. In Cassandra the DC name will be overridden by this value. - It may generate some confusion as objects created for the DC will have a different name than the CasandraDatacenter object itself. + DatacenterName allows to override the name of the Cassandra datacenter. In Cassandra the DC name will be overridden by this value. This setting can create conflicts if multiple DCs coexist in the same namespace if metadata.name for a DC with no override is set to the same value as the override name of another DC. Use cautiously. type: string @@ -4251,8 +4250,7 @@ spec: type: array datacenterName: description: |- - DatacenterName allows to override the name of the Cassandra datacenter. Kubernetes objects will be named after a sanitized version of it if set, and if not metadata.name. In Cassandra the DC name will be overridden by this value. - It may generate some confusion as objects created for the DC will have a different name than the CasandraDatacenter object itself. + DatacenterName allows to override the name of the Cassandra datacenter. In Cassandra the DC name will be overridden by this value. This setting can create conflicts if multiple DCs coexist in the same namespace if metadata.name for a DC with no override is set to the same value as the override name of another DC. Use cautiously. type: string @@ -31083,6 +31081,9 @@ spec: with the management API format: date-time type: string + metadataVersion: + format: int64 + type: integer nodeReplacements: items: type: string @@ -31183,6 +31184,8 @@ spec: format: date-time type: string type: object + contextName: + type: string decommissionProgress: type: string reaper: diff --git a/config/cass-operator/cluster-scoped/kustomization.yaml b/config/cass-operator/cluster-scoped/kustomization.yaml index aacf08d4c..bdce74290 100644 --- a/config/cass-operator/cluster-scoped/kustomization.yaml +++ b/config/cass-operator/cluster-scoped/kustomization.yaml @@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- github.com/k8ssandra/cass-operator/config/deployments/cluster?ref=v1.22.4 +- github.com/k8ssandra/cass-operator/config/deployments/cluster?ref=v1.23.0 diff --git a/config/cass-operator/ns-scoped/kustomization.yaml b/config/cass-operator/ns-scoped/kustomization.yaml index 18fbf88f1..baca9eb1f 100644 --- a/config/cass-operator/ns-scoped/kustomization.yaml +++ b/config/cass-operator/ns-scoped/kustomization.yaml @@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- github.com/k8ssandra/cass-operator/config/deployments/default?ref=v1.22.4 +- github.com/k8ssandra/cass-operator/config/deployments/default?ref=v1.23.0 diff --git a/config/crd/bases/k8ssandra.io_k8ssandraclusters.yaml b/config/crd/bases/k8ssandra.io_k8ssandraclusters.yaml index 2f7151c3d..9e05f1673 100644 --- a/config/crd/bases/k8ssandra.io_k8ssandraclusters.yaml +++ b/config/crd/bases/k8ssandra.io_k8ssandraclusters.yaml @@ -2160,8 +2160,7 @@ spec: type: array datacenterName: description: |- - DatacenterName allows to override the name of the Cassandra datacenter. Kubernetes objects will be named after a sanitized version of it if set, and if not metadata.name. In Cassandra the DC name will be overridden by this value. - It may generate some confusion as objects created for the DC will have a different name than the CasandraDatacenter object itself. + DatacenterName allows to override the name of the Cassandra datacenter. In Cassandra the DC name will be overridden by this value. This setting can create conflicts if multiple DCs coexist in the same namespace if metadata.name for a DC with no override is set to the same value as the override name of another DC. Use cautiously. type: string @@ -4189,8 +4188,7 @@ spec: type: array datacenterName: description: |- - DatacenterName allows to override the name of the Cassandra datacenter. Kubernetes objects will be named after a sanitized version of it if set, and if not metadata.name. In Cassandra the DC name will be overridden by this value. - It may generate some confusion as objects created for the DC will have a different name than the CasandraDatacenter object itself. + DatacenterName allows to override the name of the Cassandra datacenter. In Cassandra the DC name will be overridden by this value. This setting can create conflicts if multiple DCs coexist in the same namespace if metadata.name for a DC with no override is set to the same value as the override name of another DC. Use cautiously. type: string @@ -31021,6 +31019,9 @@ spec: with the management API format: date-time type: string + metadataVersion: + format: int64 + type: integer nodeReplacements: items: type: string @@ -31121,6 +31122,8 @@ spec: format: date-time type: string type: object + contextName: + type: string decommissionProgress: type: string reaper: diff --git a/controllers/k8ssandra/cassandra_telemetry_reconciler.go b/controllers/k8ssandra/cassandra_telemetry_reconciler.go index 1de74796f..0f8e3afff 100644 --- a/controllers/k8ssandra/cassandra_telemetry_reconciler.go +++ b/controllers/k8ssandra/cassandra_telemetry_reconciler.go @@ -37,9 +37,9 @@ func (r *K8ssandraClusterReconciler) reconcileCassandraDCTelemetry( cfg := telemetry.PrometheusResourcer{ MonitoringTargetNS: actualDc.Namespace, MonitoringTargetName: actualDc.Name, - ServiceMonitorName: cassdcapi.CleanupForKubernetes(kc.CassClusterName() + "-" + actualDc.DatacenterName() + "-" + "cass-servicemonitor"), + ServiceMonitorName: cassdcapi.CleanupForKubernetes(kc.CassClusterName() + "-" + actualDc.LabelResourceName() + "-" + "cass-servicemonitor"), Logger: logger, - CommonLabels: mustLabels(kc.Name, kc.Namespace, actualDc.DatacenterName(), commonLabels), + CommonLabels: mustLabels(kc.Name, kc.Namespace, actualDc.LabelResourceName(), commonLabels), } logger.Info("merged TelemetrySpec constructed", "mergedSpec", mergedSpec, "cluster", kc.Name) // Confirm telemetry config is valid (e.g. Prometheus is installed if it is requested.) diff --git a/controllers/k8ssandra/cleanup.go b/controllers/k8ssandra/cleanup.go index b3acbe102..af1ac704b 100644 --- a/controllers/k8ssandra/cleanup.go +++ b/controllers/k8ssandra/cleanup.go @@ -144,7 +144,7 @@ func (r *K8ssandraClusterReconciler) checkFinalizer(ctx context.Context, kc *api } func (r *K8ssandraClusterReconciler) checkDcDeletion(ctx context.Context, kc *api.K8ssandraCluster, logger logr.Logger) result.ReconcileResult { - dcName, dcNameOverride := k8ssandra.GetDatacenterForDecommission(kc) + dcName := k8ssandra.GetDatacenterForDecommission(kc) if dcName == "" { return result.Continue() } @@ -163,76 +163,78 @@ func (r *K8ssandraClusterReconciler) checkDcDeletion(ctx context.Context, kc *ap default: logger.Info("Proceeding with DC deletion", "DC", dcName) - cassDcName := dcName - if dcNameOverride != "" { - cassDcName = dcNameOverride - } - return r.deleteDc(ctx, kc, dcName, cassDcName, logger) + return r.deleteDc(ctx, kc, dcName, logger) } } -func (r *K8ssandraClusterReconciler) deleteDc(ctx context.Context, kc *api.K8ssandraCluster, dcName string, cassDcName string, logger logr.Logger) result.ReconcileResult { +func (r *K8ssandraClusterReconciler) deleteDc(ctx context.Context, kc *api.K8ssandraCluster, dcName string, logger logr.Logger) result.ReconcileResult { kcKey := utils.GetKey(kc) - stargate, remoteClient, err := r.findStargateForDeletion(ctx, kcKey, cassDcName, nil) + dcRemoteClient, err := r.ClientCache.GetRemoteClient(kc.Status.Datacenters[dcName].ContextName) + if err != nil { + return result.Error(err) + } + + dc, _, err := r.findDcForDeletion(ctx, kcKey, dcName, dcRemoteClient) + if err != nil { + return result.Error(err) + } + + if dc == nil { + // Deletion was already done + delete(kc.Status.Datacenters, dcName) + logger.Info("DC deletion finished", "DC", dcName) + return result.Continue() + } + + stargate, remoteClient, err := r.findStargateForDeletion(ctx, kcKey, dc.DatacenterName(), nil) if err != nil { return result.Error(err) } if stargate != nil { if err = remoteClient.Delete(ctx, stargate); err != nil && !errors.IsNotFound(err) { - return result.Error(fmt.Errorf("failed to delete Stargate for dc (%s): %v", cassDcName, err)) + return result.Error(fmt.Errorf("failed to delete Stargate for dc (%s): %v", dc.DatacenterName(), err)) } logger.Info("Deleted Stargate", "Stargate", utils.GetKey(stargate)) } - reaper, remoteClient, err := r.findReaperForDeletion(ctx, kcKey, cassDcName, remoteClient) + reaper, remoteClient, err := r.findReaperForDeletion(ctx, kcKey, dc.DatacenterName(), remoteClient) if err != nil { return result.Error(err) } if reaper != nil { if err = remoteClient.Delete(ctx, reaper); err != nil && !errors.IsNotFound(err) { - return result.Error(fmt.Errorf("failed to delete Reaper for dc (%s): %v", cassDcName, err)) + return result.Error(fmt.Errorf("failed to delete Reaper for dc (%s): %v", dc.DatacenterName(), err)) } logger.Info("Deleted Reaper", "Reaper", utils.GetKey(reaper)) } - dc, remoteClient, err := r.findDcForDeletion(ctx, kcKey, dcName, remoteClient) - if err != nil { + if err := r.deleteContactPointsService(ctx, kc, dc, logger); err != nil { return result.Error(err) } - if dc != nil { - if err := r.deleteContactPointsService(ctx, kc, dc, logger); err != nil { - return result.Error(err) - } - - if dc.GetConditionStatus(cassdcapi.DatacenterDecommission) == corev1.ConditionTrue { - logger.Info("CassandraDatacenter decommissioning in progress", "CassandraDatacenter", utils.GetKey(dc)) - // There is no need to requeue here. Reconciliation will be trigger by updates made by cass-operator. - return result.Done() - } - - if !annotations.HasAnnotationWithValue(dc, cassdcapi.DecommissionOnDeleteAnnotation, "true") { - patch := client.MergeFrom(dc.DeepCopy()) - annotations.AddAnnotation(dc, cassdcapi.DecommissionOnDeleteAnnotation, "true") - if err = remoteClient.Patch(ctx, dc, patch); err != nil { - return result.Error(fmt.Errorf("failed to add %s annotation to dc: %v", cassdcapi.DecommissionOnDeleteAnnotation, err)) - } - } - - if err = remoteClient.Delete(ctx, dc); err != nil && !errors.IsNotFound(err) { - return result.Error(fmt.Errorf("failed to delete CassandraDatacenter (%s): %v", dcName, err)) - } - logger.Info("Deleted CassandraDatacenter", "CassandraDatacenter", utils.GetKey(dc)) + if dc.GetConditionStatus(cassdcapi.DatacenterDecommission) == corev1.ConditionTrue { + logger.Info("CassandraDatacenter decommissioning in progress", "CassandraDatacenter", utils.GetKey(dc)) // There is no need to requeue here. Reconciliation will be trigger by updates made by cass-operator. return result.Done() } - delete(kc.Status.Datacenters, dcName) - logger.Info("DC deletion finished", "DC", dcName) - return result.Continue() + if !annotations.HasAnnotationWithValue(dc, cassdcapi.DecommissionOnDeleteAnnotation, "true") { + patch := client.MergeFrom(dc.DeepCopy()) + annotations.AddAnnotation(dc, cassdcapi.DecommissionOnDeleteAnnotation, "true") + if err = dcRemoteClient.Patch(ctx, dc, patch); err != nil { + return result.Error(fmt.Errorf("failed to add %s annotation to dc: %v", cassdcapi.DecommissionOnDeleteAnnotation, err)) + } + } + + if err = dcRemoteClient.Delete(ctx, dc); err != nil && !errors.IsNotFound(err) { + return result.Error(fmt.Errorf("failed to delete CassandraDatacenter (%s): %v", dcName, err)) + } + logger.Info("Deleted CassandraDatacenter", "CassandraDatacenter", utils.GetKey(dc)) + // There is no need to requeue here. Reconciliation will be trigger by updates made by cass-operator. + return result.Done() } func (r *K8ssandraClusterReconciler) findStargateForDeletion( diff --git a/controllers/k8ssandra/contact_points_service.go b/controllers/k8ssandra/contact_points_service.go index dfbdbf9b2..40f1b5662 100644 --- a/controllers/k8ssandra/contact_points_service.go +++ b/controllers/k8ssandra/contact_points_service.go @@ -2,6 +2,7 @@ package k8ssandra import ( "context" + "github.com/go-logr/logr" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" api "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" @@ -82,7 +83,7 @@ func (r *K8ssandraClusterReconciler) loadAllPodsEndpoints( func contactPointsServiceKey(kc *api.K8ssandraCluster, dc *cassdcapi.CassandraDatacenter) client.ObjectKey { return types.NamespacedName{ Namespace: kc.Namespace, - Name: kc.SanitizedName() + "-" + dc.SanitizedName() + "-contact-points-service", + Name: kc.SanitizedName() + "-" + dc.LabelResourceName() + "-contact-points-service", } } diff --git a/controllers/k8ssandra/datacenters.go b/controllers/k8ssandra/datacenters.go index 5961fccef..2e3d45fcb 100644 --- a/controllers/k8ssandra/datacenters.go +++ b/controllers/k8ssandra/datacenters.go @@ -148,7 +148,7 @@ func (r *K8ssandraClusterReconciler) reconcileDatacenters(ctx context.Context, k return result.Error(fmt.Errorf("CassandraDatacenter %s has cluster name %s, but expected %s. Cluster name cannot be changed in an existing cluster", dcKey, actualDc.Spec.ClusterName, cassClusterName)), actualDcs } - r.setStatusForDatacenter(kc, actualDc) + r.setStatusForDatacenter(kc, actualDc, dcConfig.K8sContext) r.reconcileContactPointsService(ctx, kc, actualDc, remoteClient, dcLogger) @@ -309,7 +309,7 @@ func (r *K8ssandraClusterReconciler) reconcileDatacenters(ctx context.Context, k return result.Continue(), actualDcs } -func (r *K8ssandraClusterReconciler) setStatusForDatacenter(kc *api.K8ssandraCluster, dc *cassdcapi.CassandraDatacenter) { +func (r *K8ssandraClusterReconciler) setStatusForDatacenter(kc *api.K8ssandraCluster, dc *cassdcapi.CassandraDatacenter, targetContext string) { if len(kc.Status.Datacenters) == 0 { kc.Status.Datacenters = make(map[string]api.K8ssandraStatus, 0) } @@ -318,9 +318,15 @@ func (r *K8ssandraClusterReconciler) setStatusForDatacenter(kc *api.K8ssandraClu if found { dc.Status.DeepCopyInto(kdcStatus.Cassandra) + if kdcStatus.ContextName != targetContext { + // This is pretty fatal situation if it happens to actually change the context, but for updates from previous versions we need it + kdcStatus.ContextName = targetContext + } + kc.Status.Datacenters[dc.Name] = kdcStatus } else { kc.Status.Datacenters[dc.Name] = api.K8ssandraStatus{ - Cassandra: dc.Status.DeepCopy(), + ContextName: targetContext, + Cassandra: dc.Status.DeepCopy(), } } } diff --git a/controllers/k8ssandra/reaper.go b/controllers/k8ssandra/reaper.go index c50852ea6..81ebb5a02 100644 --- a/controllers/k8ssandra/reaper.go +++ b/controllers/k8ssandra/reaper.go @@ -19,6 +19,7 @@ package k8ssandra import ( "context" "fmt" + "github.com/go-logr/logr" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" api "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" @@ -242,11 +243,8 @@ func (r *K8ssandraClusterReconciler) setStatusForReaper(kc *api.K8ssandraCluster func (r *K8ssandraClusterReconciler) removeReaperStatus(kc *api.K8ssandraCluster, dcName string) { if kdcStatus, found := kc.Status.Datacenters[dcName]; found { - kc.Status.Datacenters[dcName] = api.K8ssandraStatus{ - Reaper: nil, - Cassandra: kdcStatus.Cassandra.DeepCopy(), - Stargate: kdcStatus.Stargate.DeepCopy(), - } + kdcStatus.Reaper = nil + kc.Status.Datacenters[dcName] = kdcStatus } } diff --git a/controllers/k8ssandra/schemas.go b/controllers/k8ssandra/schemas.go index 94cce2088..042a16db3 100644 --- a/controllers/k8ssandra/schemas.go +++ b/controllers/k8ssandra/schemas.go @@ -56,18 +56,39 @@ func (r *K8ssandraClusterReconciler) checkSchemas( } } - decommCassDcName, dcNameOverride := k8ssandra.GetDatacenterForDecommission(kc) + decommCassDcName := k8ssandra.GetDatacenterForDecommission(kc) + + logger.Info("Checking if user keyspace replication needs to be updated", "decommissioning_dc", decommCassDcName) decommission := false + status := kc.Status.Datacenters[decommCassDcName] if decommCassDcName != "" { - decommission = kc.Status.Datacenters[decommCassDcName].DecommissionProgress == api.DecommUpdatingReplication + decommission = status.DecommissionProgress == api.DecommUpdatingReplication } - status := kc.Status.Datacenters[decommCassDcName] if decommission { + kcKey := utils.GetKey(kc) + logger.Info("Decommissioning DC", "dc", decommCassDcName, "context", status.ContextName) + + var dcRemoteClient client.Client + if status.ContextName == "" { + dcRemoteClient = remoteClient + } else { + dcRemoteClient, err = r.ClientCache.GetRemoteClient(status.ContextName) + if err != nil { + return result.Error(err) + } + } + + dc, _, err = r.findDcForDeletion(ctx, kcKey, decommCassDcName, dcRemoteClient) + if err != nil { + return result.Error(err) + } + decommDcName := decommCassDcName - if dcNameOverride != "" { - decommDcName = dcNameOverride + if dc.Spec.DatacenterName != "" { + decommDcName = dc.Spec.DatacenterName } + if recResult := r.checkUserKeyspacesReplicationForDecommission(kc, decommDcName, mgmtApi, logger); recResult.Completed() { return recResult } @@ -266,6 +287,7 @@ func (r *K8ssandraClusterReconciler) checkUserKeyspacesReplicationForDecommissio if err != nil { return result.Error(fmt.Errorf("failed to get replication for keyspace (%s): %v", ks, err)) } + logger.Info("checking keyspace replication", "keyspace", ks, "replication", replication, "decommissioning_dc", decommDc) if _, hasReplicas := replication[decommDc]; hasReplicas { return result.Error(fmt.Errorf("cannot decommission DC %s: keyspace %s still has replicas on it", decommDc, ks)) } diff --git a/controllers/k8ssandra/seeds.go b/controllers/k8ssandra/seeds.go index 217c99234..a870ef332 100644 --- a/controllers/k8ssandra/seeds.go +++ b/controllers/k8ssandra/seeds.go @@ -35,7 +35,7 @@ func (r *K8ssandraClusterReconciler) findSeeds(ctx context.Context, kc *api.K8ss list := &corev1.PodList{} selector := map[string]string{ cassdcapi.ClusterLabel: cassdcapi.CleanLabelValue(cassClusterName), - cassdcapi.DatacenterLabel: dcTemplate.CassDcName(), + cassdcapi.DatacenterLabel: dcTemplate.Meta.Name, cassdcapi.SeedNodeLabel: "true", } @@ -64,7 +64,7 @@ func (r *K8ssandraClusterReconciler) reconcileSeedsEndpoints( // Additional seed nodes should never be part of the current datacenter filteredSeeds := make([]corev1.Pod, 0) for _, seed := range seeds { - if seed.Labels[cassdcapi.DatacenterLabel] != dc.DatacenterName() { + if seed.Labels[cassdcapi.DatacenterLabel] != dc.Name { filteredSeeds = append(filteredSeeds, seed) } } diff --git a/controllers/k8ssandra/stargate.go b/controllers/k8ssandra/stargate.go index 9c1032dea..2caa83aa6 100644 --- a/controllers/k8ssandra/stargate.go +++ b/controllers/k8ssandra/stargate.go @@ -121,10 +121,10 @@ func (r *K8ssandraClusterReconciler) setStatusForStargate(kc *api.K8ssandraClust if found { if kdcStatus.Stargate == nil { kdcStatus.Stargate = stargate.Status.DeepCopy() - kc.Status.Datacenters[dcName] = kdcStatus } else { stargate.Status.DeepCopyInto(kdcStatus.Stargate) } + kc.Status.Datacenters[dcName] = kdcStatus } else { kc.Status.Datacenters[dcName] = api.K8ssandraStatus{ Stargate: stargate.Status.DeepCopy(), @@ -166,10 +166,7 @@ func (r *K8ssandraClusterReconciler) reconcileStargateAuthSchema( func (r *K8ssandraClusterReconciler) removeStargateStatus(kc *api.K8ssandraCluster, dcName string) { if kdcStatus, found := kc.Status.Datacenters[dcName]; found { - kc.Status.Datacenters[dcName] = api.K8ssandraStatus{ - Stargate: nil, - Cassandra: kdcStatus.Cassandra.DeepCopy(), - Reaper: kdcStatus.Reaper.DeepCopy(), - } + kdcStatus.Stargate = nil + kc.Status.Datacenters[dcName] = kdcStatus } } diff --git a/controllers/medusa/controllers_test.go b/controllers/medusa/controllers_test.go index ad715d69a..cb1a269cb 100644 --- a/controllers/medusa/controllers_test.go +++ b/controllers/medusa/controllers_test.go @@ -24,7 +24,7 @@ import ( const ( timeout = time.Second * 5 - interval = time.Millisecond * 500 + interval = time.Millisecond * 100 ) var ( @@ -38,9 +38,9 @@ func TestCassandraBackupRestore(t *testing.T) { ctx := testutils.TestSetup(t) ctx, cancel := context.WithCancel(ctx) - testEnv1 := setupMedusaBackupTestEnv(t, ctx) - defer testEnv1.Stop(t) - t.Run("TestMedusaBackupDatacenter", testEnv1.ControllerTest(ctx, testMedusaBackupDatacenter)) + testEnv := setupMedusaBackupTestEnv(t, ctx) + defer testEnv.Stop(t) + t.Run("TestMedusaBackupDatacenter", testEnv.ControllerTest(ctx, testMedusaBackupDatacenter)) testEnv2 := setupMedusaTaskTestEnv(t, ctx) defer testEnv2.Stop(t) @@ -48,16 +48,16 @@ func TestCassandraBackupRestore(t *testing.T) { testEnv3 := setupMedusaRestoreJobTestEnv(t, ctx) defer testEnv3.Stop(t) - defer cancel() t.Run("TestMedusaRestoreDatacenter", testEnv3.ControllerTest(ctx, testMedusaRestoreDatacenter)) t.Run("TestValidationErrorStopsRestore", testEnv3.ControllerTest(ctx, testValidationErrorStopsRestore)) testEnv4 := setupMedusaConfigurationTestEnv(t, ctx) defer testEnv4.Stop(t) - defer cancel() t.Run("TestMedusaConfiguration", testEnv4.ControllerTest(ctx, testMedusaConfiguration)) + // This cancel is called here to ensure the correct ordering for defer, as testEnv.Stop() calls must be done before the context is cancelled + defer cancel() } func setupMedusaBackupTestEnv(t *testing.T, ctx context.Context) *testutils.MultiClusterTestEnv { diff --git a/controllers/medusa/medusabackupjob_controller_test.go b/controllers/medusa/medusabackupjob_controller_test.go index 9a33684f5..44afb434d 100644 --- a/controllers/medusa/medusabackupjob_controller_test.go +++ b/controllers/medusa/medusabackupjob_controller_test.go @@ -243,6 +243,7 @@ func createAndVerifyMedusaBackup(dcKey framework.ClusterKey, dc *cassdcapi.Cassa }, } + t.Logf("creating MedusaBackupJob %s", backupKey) err := f.Create(ctx, backupKey, backup) require.NoError(err, "failed to create MedusaBackupJob") @@ -258,9 +259,13 @@ func createAndVerifyMedusaBackup(dcKey framework.ClusterKey, dc *cassdcapi.Cassa verifyBackupJobStarted(require.Never, t, dc, f, ctx, backupKey) } + // TODO That previous check does not actually verify that the MedusaBackup has yet been written (if it should be), so we need to wait a bit more + time.Sleep(500 * time.Millisecond) + t.Log("check for the MedusaBackup being created") medusaBackupKey := framework.NewClusterKey(dcKey.K8sContext, dcKey.Namespace, backupName) medusaBackup := &api.MedusaBackup{} + t.Logf("getting MedusaBackup %s", medusaBackupKey) err = f.Get(ctx, medusaBackupKey, medusaBackup) if err != nil { if errors.IsNotFound(err) { @@ -268,6 +273,7 @@ func createAndVerifyMedusaBackup(dcKey framework.ClusterKey, dc *cassdcapi.Cassa return false } } + t.Log("verify the MedusaBackup is correct") require.Equal(medusaBackup.Status.TotalNodes, dc.Spec.Size, "backup total nodes doesn't match dc nodes") require.Equal(medusaBackup.Status.FinishedNodes, dc.Spec.Size, "backup finished nodes doesn't match dc nodes") @@ -556,20 +562,17 @@ func createDatacenterPods(t *testing.T, f *framework.Framework, ctx context.Cont _ = f.CreateNamespace(dcKey.Namespace) for i := int32(0); i < dc.Spec.Size; i++ { pod := &corev1.Pod{} - podName := fmt.Sprintf("%s-%s-%d", dc.Spec.ClusterName, dc.DatacenterName(), i) + podName := fmt.Sprintf("%s-%s-%d", dc.Spec.ClusterName, dc.LabelResourceName(), i) podKey := framework.NewClusterKey(dcKey.K8sContext, dcKey.Namespace, podName) err := f.Get(ctx, podKey, pod) if err != nil { if errors.IsNotFound(err) { - t.Logf("pod %s-%s-%d not found", dc.Spec.ClusterName, dc.DatacenterName(), i) + t.Logf("pod %s-%s-%d not found: %s", dc.Spec.ClusterName, dc.LabelResourceName(), i, dc.Name) pod = &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Namespace: dc.Namespace, Name: podName, - Labels: map[string]string{ - cassdcapi.ClusterLabel: cassdcapi.CleanLabelValue(dc.Spec.ClusterName), - cassdcapi.DatacenterLabel: dc.DatacenterName(), - }, + Labels: dc.GetDatacenterLabels(), }, Spec: corev1.PodSpec{ Containers: []corev1.Container{ @@ -588,7 +591,7 @@ func createDatacenterPods(t *testing.T, f *framework.Framework, ctx context.Cont require.NoError(t, err, "failed to create datacenter pod") patch := client.MergeFrom(pod.DeepCopy()) - pod.Status.PodIP = getPodIpAddress(int(i), dc.DatacenterName()) + pod.Status.PodIP = getPodIpAddress(int(i), dc.LabelResourceName()) err = f.PatchStatus(ctx, pod, patch, podKey) require.NoError(t, err, "failed to patch datacenter pod status") diff --git a/controllers/medusa/medusarestorejob_controller.go b/controllers/medusa/medusarestorejob_controller.go index 0c85551e1..4a70cf263 100644 --- a/controllers/medusa/medusarestorejob_controller.go +++ b/controllers/medusa/medusarestorejob_controller.go @@ -20,11 +20,12 @@ import ( "context" "encoding/json" "fmt" - "github.com/go-logr/logr" - "github.com/k8ssandra/k8ssandra-operator/pkg/shared" "net" "time" + "github.com/go-logr/logr" + "github.com/k8ssandra/k8ssandra-operator/pkg/shared" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -237,7 +238,7 @@ func (r *MedusaRestoreJobReconciler) podTemplateSpecUpdateComplete(ctx context.C // StatefulSets are scaled back up. statefulsetList := &appsv1.StatefulSetList{} - labels := client.MatchingLabels{cassdcapi.ClusterLabel: cassdcapi.CleanLabelValue(req.Datacenter.Spec.ClusterName), cassdcapi.DatacenterLabel: req.Datacenter.DatacenterName()} + labels := client.MatchingLabels{cassdcapi.ClusterLabel: cassdcapi.CleanLabelValue(req.Datacenter.Spec.ClusterName), cassdcapi.DatacenterLabel: req.Datacenter.Name} if err := r.List(ctx, statefulsetList, labels); err != nil { req.Log.Error(err, "Failed to get StatefulSets") diff --git a/controllers/medusa/medusarestorejob_controller_test.go b/controllers/medusa/medusarestorejob_controller_test.go index bd602b2e9..af5da9734 100644 --- a/controllers/medusa/medusarestorejob_controller_test.go +++ b/controllers/medusa/medusarestorejob_controller_test.go @@ -2,11 +2,12 @@ package medusa import ( "context" - "k8s.io/apimachinery/pkg/api/errors" "sync" "testing" "time" + "k8s.io/apimachinery/pkg/api/errors" + cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" k8ss "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" api "github.com/k8ssandra/k8ssandra-operator/apis/medusa/v1alpha1" @@ -195,7 +196,7 @@ func testMedusaRestoreDatacenter(t *testing.T, ctx context.Context, f *framework }), timeout*5, interval, "timed out waiting for CassandraDatacenter stopped flag to be set") t.Log("delete datacenter pods to simulate shutdown") - err = f.DeleteAllOf(ctx, dc1Key.K8sContext, &corev1.Pod{}, client.InNamespace(namespace), client.MatchingLabels{cassdcapi.DatacenterLabel: "real-dc1"}) + err = f.DeleteAllOf(ctx, dc1Key.K8sContext, &corev1.Pod{}, client.InNamespace(namespace), client.MatchingLabels{cassdcapi.DatacenterLabel: "dc1"}) require.NoError(err, "failed to delete datacenter pods") restore = &api.MedusaRestoreJob{} @@ -463,15 +464,15 @@ func testValidationErrorStopsRestore(t *testing.T, ctx context.Context, f *frame backup.Status.TotalNodes = dc1.Spec.Size backup.Status.Nodes = []*api.MedusaBackupNode{ { - Datacenter: "real-dc1", + Datacenter: "dc1", Rack: "default", }, { - Datacenter: "real-dc1", + Datacenter: "dc1", Rack: "default", }, { - Datacenter: "real-dc1", + Datacenter: "dc1", Rack: "default", }, } diff --git a/controllers/reaper/reaper_controller_test.go b/controllers/reaper/reaper_controller_test.go index edb66e248..b41183e2b 100644 --- a/controllers/reaper/reaper_controller_test.go +++ b/controllers/reaper/reaper_controller_test.go @@ -2,11 +2,12 @@ package reaper import ( "context" - "k8s.io/apimachinery/pkg/api/resource" - "k8s.io/utils/ptr" "testing" "time" + "k8s.io/apimachinery/pkg/api/resource" + "k8s.io/utils/ptr" + cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" k8ssandraapi "github.com/k8ssandra/k8ssandra-operator/apis/k8ssandra/v1alpha1" reaperapi "github.com/k8ssandra/k8ssandra-operator/apis/reaper/v1alpha1" @@ -131,10 +132,7 @@ func beforeTest(t *testing.T, ctx context.Context, k8sClient client.Client, test ObjectMeta: metav1.ObjectMeta{ Name: "test-cassdc-pod1", Namespace: testNamespace, - Labels: map[string]string{ - cassdcapi.ClusterLabel: cassdcapi.CleanLabelValue(cassandraClusterName), - cassdcapi.DatacenterLabel: cassandraDatacenterName, - }, + Labels: cassdc.GetDatacenterLabels(), }, Spec: corev1.PodSpec{ Containers: []corev1.Container{{ @@ -162,11 +160,8 @@ func beforeTest(t *testing.T, ctx context.Context, k8sClient client.Client, test Namespace: testNamespace, }, Spec: corev1.ServiceSpec{ - Ports: []corev1.ServicePort{{Name: "mgmt-api-http", Port: int32(8080)}}, - Selector: map[string]string{ - cassdcapi.ClusterLabel: cassdcapi.CleanLabelValue(cassandraClusterName), - cassdcapi.DatacenterLabel: cassandraDatacenterName, - }, + Ports: []corev1.ServicePort{{Name: "mgmt-api-http", Port: int32(8080)}}, + Selector: cassdc.GetDatacenterLabels(), }, } err = k8sClient.Create(ctx, service) diff --git a/go.mod b/go.mod index f021b21d4..c8e554f1a 100644 --- a/go.mod +++ b/go.mod @@ -17,7 +17,7 @@ require ( github.com/go-logr/zapr v1.3.0 github.com/google/uuid v1.3.0 github.com/gruntwork-io/terratest v0.37.7 - github.com/k8ssandra/cass-operator v1.22.4 + github.com/k8ssandra/cass-operator v1.23.0 github.com/k8ssandra/reaper-client-go v0.3.1-0.20220114183114-6923e077c4f5 github.com/pkg/errors v0.9.1 github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.52.1 diff --git a/go.sum b/go.sum index 89b8485c9..de3cdf1dc 100644 --- a/go.sum +++ b/go.sum @@ -1438,8 +1438,8 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= -github.com/k8ssandra/cass-operator v1.22.4 h1:kcVVSvm6H50j+ZyOCBlYEfwaoxd755NjSQSBt3SfIpg= -github.com/k8ssandra/cass-operator v1.22.4/go.mod h1:KtROSvoTwB6eFzcRjOmTUAGkbDHnNtK4ayk4KDllHy4= +github.com/k8ssandra/cass-operator v1.23.0 h1:1870oko++z920dkoT7PbQjWtgAUsdSeJpywJEdrvD8I= +github.com/k8ssandra/cass-operator v1.23.0/go.mod h1:KtROSvoTwB6eFzcRjOmTUAGkbDHnNtK4ayk4KDllHy4= github.com/k8ssandra/reaper-client-go v0.3.1-0.20220114183114-6923e077c4f5 h1:Dq0VdM960G3AbhYwFuaebmsE08IzOYHYhngUfDmWaAc= github.com/k8ssandra/reaper-client-go v0.3.1-0.20220114183114-6923e077c4f5/go.mod h1:WsQymIaVT39xbcstZhdqynUS13AGzP2p6U9Hsk1oy5M= github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51/go.mod h1:CzGEWj7cYgsdH8dAjBGEr58BoE7ScuLd+fwFZ44+/x8= diff --git a/pkg/cassandra/management.go b/pkg/cassandra/management.go index 7a76d6a1a..9e8904f88 100644 --- a/pkg/cassandra/management.go +++ b/pkg/cassandra/management.go @@ -3,9 +3,10 @@ package cassandra import ( "context" "fmt" + "strconv" + "github.com/k8ssandra/k8ssandra-operator/pkg/errors" "github.com/k8ssandra/k8ssandra-operator/pkg/utils" - "strconv" "github.com/go-logr/logr" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" @@ -134,9 +135,7 @@ func (r *defaultManagementApiFacade) CreateKeyspaceIfNotExists( func (r *defaultManagementApiFacade) fetchDatacenterPods() ([]corev1.Pod, error) { podList := &corev1.PodList{} - labels := client.MatchingLabels{ - cassdcapi.DatacenterLabel: cassdcapi.CleanLabelValue(r.dc.DatacenterName()), - cassdcapi.ClusterLabel: cassdcapi.CleanLabelValue(r.dc.Spec.ClusterName)} + labels := client.MatchingLabels(r.dc.GetDatacenterLabels()) if err := r.k8sClient.List(r.ctx, podList, labels); err != nil { return nil, err } else { @@ -303,7 +302,7 @@ func (r *defaultManagementApiFacade) EnsureKeyspaceReplication(keyspaceName stri if actualReplication, err := r.GetKeyspaceReplication(keyspaceName); err != nil { return err } else if CompareReplications(actualReplication, replication) { - r.logger.Info(fmt.Sprintf("Keyspace %s has desired replication", keyspaceName)) + r.logger.Info(fmt.Sprintf("Keyspace %s has desired replication: %v", keyspaceName, replication)) return nil } else { r.logger.Info(fmt.Sprintf("keyspace %s already exists in cluster %v but has wrong replication, altering it. Expected: %v / Got: %v", keyspaceName, r.dc.Spec.ClusterName, replication, actualReplication)) diff --git a/pkg/cassandra/tokens.go b/pkg/cassandra/tokens.go index 82a8aef96..6b7c1bf70 100644 --- a/pkg/cassandra/tokens.go +++ b/pkg/cassandra/tokens.go @@ -7,7 +7,9 @@ import ( "strings" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" + "github.com/k8ssandra/cass-operator/pkg/reconciliation" "github.com/k8ssandra/k8ssandra-operator/pkg/utils" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // ComputeInitialTokens computes initial tokens for each DC, assign those tokens to the first RF @@ -146,18 +148,24 @@ func assignInitialTokens(dcConfigs []*DatacenterConfig, infos []*tokenAllocation racks = []cassdcapi.Rack{{Name: "default"}} } + dc := &cassdcapi.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: dcConfig.Meta.Name, + }, + Spec: cassdcapi.CassandraDatacenterSpec{ + ClusterName: dcConfig.Cluster, + }, + } + // First, generate RF pod names since we need to assign tokens to the RF first nodes // only. Note: we know that RF < dc.Size, that has been checked before, so we will never // generate a pod name that doesn't exist. var podNames []string for i := 0; i < infos[dcIndex].rf; i++ { - rackIndex := i % len(racks) + rack := racks[i%len(racks)] + stsName := reconciliation.NewNamespacedNameForStatefulSet(dc, rack.Name) podIndex := i / len(racks) - podName := fmt.Sprintf("%s-%s-%s-sts-%d", - cassdcapi.CleanupForKubernetes(dcConfig.Cluster), - dcConfig.CassDcName(), - cassdcapi.CleanupSubdomain(racks[rackIndex].Name), - podIndex) + podName := fmt.Sprintf("%s-%d", stsName.Name, podIndex) podNames = append(podNames, podName) } diff --git a/pkg/k8ssandra/util.go b/pkg/k8ssandra/util.go index 4f04876ce..53d1773fb 100644 --- a/pkg/k8ssandra/util.go +++ b/pkg/k8ssandra/util.go @@ -5,7 +5,7 @@ import ( "k8s.io/utils/strings/slices" ) -func GetDatacenterForDecommission(kc *api.K8ssandraCluster) (string, string) { +func GetDatacenterForDecommission(kc *api.K8ssandraCluster) string { dcNames := make([]string, 0) for _, dc := range kc.Spec.Cassandra.Datacenters { dcNames = append(dcNames, dc.Meta.Name) @@ -15,7 +15,7 @@ func GetDatacenterForDecommission(kc *api.K8ssandraCluster) (string, string) { for dcName, status := range kc.Status.Datacenters { if !slices.Contains(dcNames, dcName) { if status.DecommissionProgress != api.DecommNone { - return dcName, dcNameOverride(kc.Status.Datacenters[dcName].Cassandra.DatacenterName) + return dcName } } } @@ -23,16 +23,9 @@ func GetDatacenterForDecommission(kc *api.K8ssandraCluster) (string, string) { // No decommissions are in progress. Pick the first one we find. for dcName := range kc.Status.Datacenters { if !slices.Contains(dcNames, dcName) { - return dcName, dcNameOverride(kc.Status.Datacenters[dcName].Cassandra.DatacenterName) + return dcName } } - return "", "" -} - -func dcNameOverride(datacenterName *string) string { - if datacenterName != nil { - return *datacenterName - } return "" } diff --git a/pkg/k8ssandra/util_test.go b/pkg/k8ssandra/util_test.go deleted file mode 100644 index de6f4b749..000000000 --- a/pkg/k8ssandra/util_test.go +++ /dev/null @@ -1,34 +0,0 @@ -package k8ssandra - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestDcNameOverride(t *testing.T) { - assert := assert.New(t) - - t.Run("with non-nil string pointer", func(t *testing.T) { - datacenterName := "Test_Datacenter" - got := dcNameOverride(&datacenterName) - assert.Equal(datacenterName, got, "The two strings should be the same") - }) - - t.Run("with nil string pointer", func(t *testing.T) { - got := dcNameOverride(nil) - assert.Equal("", got, "Without a string pointer, the output should be an empty string") - }) - - t.Run("with empty string pointer", func(t *testing.T) { - datacenterName := "" - got := dcNameOverride(&datacenterName) - assert.Equal("", got, "With an empty string pointer, the output should be an empty string") - }) - - t.Run("with string containing spaces pointer", func(t *testing.T) { - datacenterName := " " - got := dcNameOverride(&datacenterName) - assert.Equal(datacenterName, got, "The two strings (with spaces) should be the same") - }) -} diff --git a/pkg/medusa/hostmap.go b/pkg/medusa/hostmap.go index 23b04af32..c4698f126 100644 --- a/pkg/medusa/hostmap.go +++ b/pkg/medusa/hostmap.go @@ -7,6 +7,7 @@ import ( "sort" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" + "github.com/k8ssandra/cass-operator/pkg/reconciliation" medusaapi "github.com/k8ssandra/k8ssandra-operator/apis/medusa/v1alpha1" ) @@ -95,9 +96,6 @@ func getSourceRacksIPs(k8sRestore medusaapi.MedusaRestoreJob, client Client, ctx DC: i.Datacenter, } DNSOrIP := string(i.Host) - if err != nil { - return nil, err - } _, exists := out[location] if exists { out[location] = append(out[location], DNSOrIP) @@ -149,26 +147,25 @@ func sortLocations(m map[NodeLocation][]string) []rack { // getClusterRackFQDNs gets a map of racks to FQDNs from the current K8ssandraCluster k8s object. The CassDC does not exist yet, so we cannot refer to it for names. // We refer to the following code for how to calculate pod names: // https://github.com/k8ssandra/cass-operator/blob/master/pkg/reconciliation/construct_statefulset.go#L39 -func getTargetRackFQDNs(cassDC *cassdcapi.CassandraDatacenter) (map[NodeLocation][]string, error) { - racks := cassDC.GetRacks() +func getTargetRackFQDNs(dc *cassdcapi.CassandraDatacenter) (map[NodeLocation][]string, error) { + racks := dc.GetRacks() + rackNodeCounts := cassdcapi.SplitRacks(int(dc.Spec.Size), len(racks)) out := make(map[NodeLocation][]string) - for _, i := range racks { + for i := 0; i < len(racks); i++ { location := NodeLocation{ - DC: cassDC.SanitizedName(), - Rack: i.Name, + DC: dc.DatacenterName(), + Rack: racks[i].Name, + } + podCount := rackNodeCounts[i] + podNames := make([]string, 0, podCount) + stsName := reconciliation.NewNamespacedNameForStatefulSet(dc, racks[i].Name) + for j := 0; j < podCount; j++ { + podNames = append(podNames, fmt.Sprintf("%s-%d", stsName.Name, j)) } - sizePerRack := int(cassDC.Spec.Size) / len(racks) - out[location] = getPodNames(cassdcapi.CleanupForKubernetes(cassDC.Spec.ClusterName), cassDC.SanitizedName(), i.Name, sizePerRack) + out[location] = podNames } - return out, nil -} -func getPodNames(clusterName string, DCName string, rackName string, rackSize int) []string { - out := []string{} - for i := 0; i < rackSize; i++ { - out = append(out, fmt.Sprintf("%s-%s-%s-sts-%s", clusterName, DCName, rackName, fmt.Sprint(i))) - } - return out + return out, nil } // GetHostMap gets the hostmap for a given CassandraBackup from IP or DNS name sources to DNS named targets from the K8ssandraCluster and the backups returned by the Medusa gRPC client. diff --git a/pkg/medusa/hostmap_test.go b/pkg/medusa/hostmap_test.go index f1576335c..b997a3363 100644 --- a/pkg/medusa/hostmap_test.go +++ b/pkg/medusa/hostmap_test.go @@ -8,6 +8,7 @@ import ( pkgtest "github.com/k8ssandra/k8ssandra-operator/pkg/test" "github.com/stretchr/testify/assert" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" ) type mockgRPCClient struct{} @@ -99,6 +100,43 @@ func TestGetTargetRackFQDNs(t *testing.T) { }, } + result, err := getTargetRackFQDNs(cassDc) + assert.NoError(t, err, err) + expectedSourceRacks := map[NodeLocation][]string{ + {Rack: "default", DC: "test-dc2"}: {"test-cluster-dc2-default-sts-0", "test-cluster-dc2-default-sts-1", "test-cluster-dc2-default-sts-2"}, + } + assert.Equal(t, expectedSourceRacks, result) + cassDc.Spec.Racks = []cassdcapi.Rack{ + {Name: "rack1"}, + {Name: "rack2"}, + {Name: "rack3"}, + } + expectedSourceRacks = map[NodeLocation][]string{ + {Rack: "rack1", DC: "test-dc2"}: {"test-cluster-dc2-rack1-sts-0"}, + {Rack: "rack2", DC: "test-dc2"}: {"test-cluster-dc2-rack2-sts-0"}, + {Rack: "rack3", DC: "test-dc2"}: {"test-cluster-dc2-rack3-sts-0"}, + } + result, err = getTargetRackFQDNs(cassDc) + assert.NoError(t, err, err) + assert.Equal(t, expectedSourceRacks, result) +} + +func TestGetTargetRackFQDNsExistingOverride(t *testing.T) { + cassDc := &cassdcapi.CassandraDatacenter{ + ObjectMeta: metav1.ObjectMeta{ + Name: "dc2", + Namespace: "default", + }, + Spec: cassdcapi.CassandraDatacenterSpec{ + ClusterName: "test-cluster", + DatacenterName: "test-dc2", + Size: 3, + }, + Status: cassdcapi.CassandraDatacenterStatus{ + DatacenterName: ptr.To[string]("test-dc2"), + }, + } + result, err := getTargetRackFQDNs(cassDc) assert.NoError(t, err, err) expectedSourceRacks := map[NodeLocation][]string{ @@ -136,7 +174,7 @@ func TestGetTargetRackFQDNsOverrides(t *testing.T) { result, err := getTargetRackFQDNs(cassDc) assert.NoError(t, err, err) expectedSourceRacks := map[NodeLocation][]string{ - {Rack: "default", DC: "testdc2"}: {"testcluster-testdc2-default-sts-0", "testcluster-testdc2-default-sts-1", "testcluster-testdc2-default-sts-2"}, + {Rack: "default", DC: "Test DC2"}: {"testcluster-test-dc2-default-sts-0", "testcluster-test-dc2-default-sts-1", "testcluster-test-dc2-default-sts-2"}, } assert.Equal(t, expectedSourceRacks, result) cassDc.Spec.Racks = []cassdcapi.Rack{ @@ -145,9 +183,9 @@ func TestGetTargetRackFQDNsOverrides(t *testing.T) { {Name: "rack3"}, } expectedSourceRacks = map[NodeLocation][]string{ - {Rack: "rack1", DC: "testdc2"}: {"testcluster-testdc2-rack1-sts-0"}, - {Rack: "rack2", DC: "testdc2"}: {"testcluster-testdc2-rack2-sts-0"}, - {Rack: "rack3", DC: "testdc2"}: {"testcluster-testdc2-rack3-sts-0"}, + {Rack: "rack1", DC: "Test DC2"}: {"testcluster-test-dc2-rack1-sts-0"}, + {Rack: "rack2", DC: "Test DC2"}: {"testcluster-test-dc2-rack2-sts-0"}, + {Rack: "rack3", DC: "Test DC2"}: {"testcluster-test-dc2-rack3-sts-0"}, } result, err = getTargetRackFQDNs(cassDc) assert.NoError(t, err, err) diff --git a/pkg/medusa/requests.go b/pkg/medusa/requests.go index 63f34a1b6..a36119c08 100644 --- a/pkg/medusa/requests.go +++ b/pkg/medusa/requests.go @@ -2,7 +2,6 @@ package medusa import ( "context" - "time" "github.com/go-logr/logr" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" @@ -63,7 +62,7 @@ func (f *factory) NewMedusaRestoreRequest(ctx context.Context, restoreKey types. return nil, &ctrl.Result{}, nil } f.Log.Error(err, "Failed to get MedusaRestoreJob") - return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err + return nil, &ctrl.Result{}, err } backup := &api.MedusaBackup{} @@ -71,7 +70,7 @@ func (f *factory) NewMedusaRestoreRequest(ctx context.Context, restoreKey types. err = f.Get(ctx, backupKey, backup) if err != nil { f.Log.Error(err, "Failed to get MedusaBackup", "MedusaBackup", backupKey) - return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err + return nil, &ctrl.Result{}, err } dc := &cassdcapi.CassandraDatacenter{} @@ -80,7 +79,7 @@ func (f *factory) NewMedusaRestoreRequest(ctx context.Context, restoreKey types. if err != nil { // TODO The datacenter does not have to exist for a remote restore f.Log.Error(err, "Failed to get CassandraDatacenter", "CassandraDatacenter", dcKey) - return nil, &ctrl.Result{RequeueAfter: 10 * time.Second}, err + return nil, &ctrl.Result{}, err } reqLogger := f.Log.WithValues( diff --git a/pkg/medusa/utils.go b/pkg/medusa/utils.go index 7e1968dc1..bf77fb203 100644 --- a/pkg/medusa/utils.go +++ b/pkg/medusa/utils.go @@ -13,10 +13,7 @@ import ( func GetCassandraDatacenterPods(ctx context.Context, cassdc *cassdcapi.CassandraDatacenter, r client.Reader, logger logr.Logger) ([]corev1.Pod, error) { podList := &corev1.PodList{} - labels := client.MatchingLabels{ - cassdcapi.ClusterLabel: cassdcapi.CleanLabelValue(cassdc.Spec.ClusterName), - cassdcapi.DatacenterLabel: cassdc.DatacenterName(), - } + labels := client.MatchingLabels(cassdc.GetDatacenterLabels()) if err := r.List(ctx, podList, labels, client.InNamespace(cassdc.Namespace)); err != nil { logger.Error(err, "failed to get pods for cassandradatacenter", "CassandraDatacenter", cassdc.DatacenterName()) return nil, err diff --git a/pkg/test/testenv.go b/pkg/test/testenv.go index 9c0e8adb2..86f37c85c 100644 --- a/pkg/test/testenv.go +++ b/pkg/test/testenv.go @@ -46,7 +46,7 @@ import ( const ( clustersToCreate = 3 clusterProtoName = "cluster-%d-%s" - cassOperatorVersion = "v1.22.4" + cassOperatorVersion = "v1.23.0" prometheusOperatorVersion = "v0.9.0" ) diff --git a/test/e2e/auth_test.go b/test/e2e/auth_test.go index 16e642a97..a485dacc0 100644 --- a/test/e2e/auth_test.go +++ b/test/e2e/auth_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/http" + "strings" "testing" "time" @@ -46,18 +47,19 @@ func multiDcAuthOnOff(t *testing.T, ctx context.Context, namespace string, f *fr stargateGrpcHostAndPort := ingressConfigs[f.DataPlaneContexts[0]].StargateGrpc stargateCqlHostAndPort := ingressConfigs[f.DataPlaneContexts[0]].StargateCql reaperRestHostAndPort := ingressConfigs[f.DataPlaneContexts[0]].ReaperRest - f.DeployStargateIngresses(t, f.DataPlaneContexts[0], namespace, fmt.Sprintf("%s-stargate-service", DcPrefix(t, f, dc1Key)), stargateRestHostAndPort, stargateGrpcHostAndPort) - f.DeployReaperIngresses(t, f.DataPlaneContexts[0], namespace, fmt.Sprintf("%s-reaper-service", DcPrefix(t, f, dc1Key)), reaperRestHostAndPort) - checkStargateApisReachable(t, ctx, f.DataPlaneContexts[0], namespace, DcPrefix(t, f, dc1Key), stargateRestHostAndPort, stargateGrpcHostAndPort, stargateCqlHostAndPort, "", "", false, f) + + f.DeployStargateIngresses(t, f.DataPlaneContexts[0], namespace, fmt.Sprintf("%s-service", stargate1Key.Name), stargateRestHostAndPort, stargateGrpcHostAndPort) + f.DeployReaperIngresses(t, f.DataPlaneContexts[0], namespace, fmt.Sprintf("%s-service", reaper1Key.Name), reaperRestHostAndPort) + checkStargateApisReachable(t, ctx, f.DataPlaneContexts[0], namespace, stargate1Key.Name, stargateRestHostAndPort, stargateGrpcHostAndPort, stargateCqlHostAndPort, "", "", false, f) checkReaperApiReachable(t, ctx, reaperRestHostAndPort) stargateRestHostAndPort = ingressConfigs[f.DataPlaneContexts[1]].StargateRest stargateGrpcHostAndPort = ingressConfigs[f.DataPlaneContexts[1]].StargateGrpc stargateCqlHostAndPort = ingressConfigs[f.DataPlaneContexts[1]].StargateCql reaperRestHostAndPort = ingressConfigs[f.DataPlaneContexts[1]].ReaperRest - f.DeployStargateIngresses(t, f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-stargate-service", DcPrefix(t, f, dc2Key)), stargateRestHostAndPort, stargateGrpcHostAndPort) - f.DeployReaperIngresses(t, f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-reaper-service", DcPrefix(t, f, dc2Key)), reaperRestHostAndPort) - checkStargateApisReachable(t, ctx, f.DataPlaneContexts[1], namespace, DcPrefix(t, f, dc2Key), stargateRestHostAndPort, stargateGrpcHostAndPort, stargateCqlHostAndPort, "", "", false, f) + f.DeployStargateIngresses(t, f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-service", stargate2Key.Name), stargateRestHostAndPort, stargateGrpcHostAndPort) + f.DeployReaperIngresses(t, f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-service", reaper2Key.Name), reaperRestHostAndPort) + checkStargateApisReachable(t, ctx, f.DataPlaneContexts[1], namespace, stargate2Key.Name, stargateRestHostAndPort, stargateGrpcHostAndPort, stargateCqlHostAndPort, "", "", false, f) checkReaperApiReachable(t, ctx, reaperRestHostAndPort) defer f.UndeployAllIngresses(t, f.DataPlaneContexts[0], namespace) @@ -72,7 +74,7 @@ func multiDcAuthOnOff(t *testing.T, ctx context.Context, namespace string, f *fr // turn auth on toggleAuthentication(t, f, ctx, kcKey, true) waitForAllComponentsReady(t, f, ctx, kcKey, dc1Key, dc2Key, stargate1Key, stargate2Key, reaper1Key, reaper2Key) - testAuthenticationEnabled(t, f, ctx, namespace, kcKey, reaperUiSecretKey, replication, pod1Name, pod2Name, DcPrefix(t, f, dc1Key), DcPrefix(t, f, dc2Key)) + testAuthenticationEnabled(t, f, ctx, namespace, kcKey, reaperUiSecretKey, replication, pod1Name, pod2Name, DcPrefix(t, f, dc1Key), DcPrefixOverride(t, f, dc2Key)) } func waitForAllComponentsReady( @@ -98,13 +100,16 @@ func waitForAllComponentsReady( // pod that has authentication enabled while we just turned it off. options1 := kubectl.Options{Namespace: kcKey.Namespace, Context: f.DataPlaneContexts[0]} options2 := kubectl.Options{Namespace: kcKey.Namespace, Context: f.DataPlaneContexts[1]} - err := kubectl.RolloutStatus(ctx, options1, "deployment", fmt.Sprintf("%s-default-stargate-deployment", DcPrefix(t, f, dc1Key))) + + stargate1Prefix, _ := strings.CutSuffix(stargate1Key.Name, "-stargate") + stargate2Prefix, _ := strings.CutSuffix(stargate2Key.Name, "-stargate") + err := kubectl.RolloutStatus(ctx, options1, "deployment", fmt.Sprintf("%s-default-stargate-deployment", stargate1Prefix)) assert.NoError(t, err) - err = kubectl.RolloutStatus(ctx, options1, "deployment", fmt.Sprintf("%s-reaper", DcPrefix(t, f, dc1Key))) + err = kubectl.RolloutStatus(ctx, options1, "deployment", reaper1Key.Name) assert.NoError(t, err) - err = kubectl.RolloutStatus(ctx, options2, "deployment", fmt.Sprintf("%s-default-stargate-deployment", DcPrefix(t, f, dc2Key))) + err = kubectl.RolloutStatus(ctx, options2, "deployment", fmt.Sprintf("%s-default-stargate-deployment", stargate2Prefix)) assert.NoError(t, err) - err = kubectl.RolloutStatus(ctx, options2, "deployment", fmt.Sprintf("%s-reaper", DcPrefix(t, f, dc2Key))) + err = kubectl.RolloutStatus(ctx, options2, "deployment", reaper2Key.Name) assert.NoError(t, err) } diff --git a/test/e2e/medusa_test.go b/test/e2e/medusa_test.go index 22140d5e2..334d7c468 100644 --- a/test/e2e/medusa_test.go +++ b/test/e2e/medusa_test.go @@ -3,11 +3,12 @@ package e2e import ( "context" "fmt" - "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/utils/ptr" "testing" "time" + "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/utils/ptr" + "github.com/stretchr/testify/assert" cassdcapi "github.com/k8ssandra/cass-operator/apis/cassandra/v1beta1" @@ -189,8 +190,8 @@ func checkPurgeCronJobExists(t *testing.T, ctx context.Context, namespace string t.Log("Checking that the purge Cron Job exists") // check that the cronjob exists cronJob := &batchv1.CronJob{} - err = f.Get(ctx, framework.NewClusterKey(dcKey.K8sContext, namespace, medusapkg.MedusaPurgeCronJobName(kc.SanitizedName(), dc1.SanitizedName())), cronJob) - require.NoErrorf(err, "Error getting the Medusa purge CronJob. ClusterName: %s, DatacenterName: %s", kc.SanitizedName(), dc1.SanitizedName()) + err = f.Get(ctx, framework.NewClusterKey(dcKey.K8sContext, namespace, medusapkg.MedusaPurgeCronJobName(kc.SanitizedName(), dc1.DatacenterName())), cronJob) + require.NoErrorf(err, "Error getting the Medusa purge CronJob. ClusterName: %s, DatacenterName: %s", kc.SanitizedName(), dc1.LabelResourceName()) require.Equal("k8ssandra-operator", cronJob.Spec.JobTemplate.Spec.Template.Spec.ServiceAccountName, "Service account name is not correct") // create a Job from the cronjob spec job := &batchv1.Job{ @@ -201,7 +202,7 @@ func checkPurgeCronJobExists(t *testing.T, ctx context.Context, namespace string Spec: cronJob.Spec.JobTemplate.Spec, } err = f.Create(ctx, dcKey, job) - require.NoErrorf(err, "Error creating the Medusa purge Job. ClusterName: %s, DataceneterName: %s, Namespace: %s, JobName: test-purge-job", kc.SanitizedName(), dc1.SanitizedName(), namespace) + require.NoErrorf(err, "Error creating the Medusa purge Job. ClusterName: %s, DataceneterName: %s, Namespace: %s, JobName: test-purge-job", kc.SanitizedName(), dc1.LabelResourceName(), namespace) // ensure the job run was successful require.Eventually(func() bool { updated := &batchv1.Job{} @@ -223,7 +224,7 @@ func checkNoPurgeCronJob(t *testing.T, ctx context.Context, namespace string, dc require.NoError(err, "Error getting the CassandraDatacenter") // ensure the cronjob was not created cronJob := &batchv1.CronJob{} - err = f.Get(ctx, framework.NewClusterKey(dcKey.K8sContext, namespace, medusapkg.MedusaPurgeCronJobName(kc.SanitizedName(), dc1.SanitizedName())), cronJob) + err = f.Get(ctx, framework.NewClusterKey(dcKey.K8sContext, namespace, medusapkg.MedusaPurgeCronJobName(kc.SanitizedName(), dc1.DatacenterName())), cronJob) require.Error(err, "Cronjob should not exist") } @@ -238,7 +239,7 @@ func checkPurgeCronJobDeleted(t *testing.T, ctx context.Context, namespace strin require.Eventually(func() bool { // ensure the cronjob was deleted cronJob := &batchv1.CronJob{} - err = f.Get(ctx, framework.NewClusterKey(dcKey.K8sContext, namespace, medusapkg.MedusaPurgeCronJobName(kc.SanitizedName(), dc1.SanitizedName())), cronJob) + err = f.Get(ctx, framework.NewClusterKey(dcKey.K8sContext, namespace, medusapkg.MedusaPurgeCronJobName(kc.SanitizedName(), dc1.DatacenterName())), cronJob) return errors.IsNotFound(err) }, polling.medusaBackupDone.timeout, polling.medusaBackupDone.interval, "Medusa purge CronJob wasn't deleted within timeout") } diff --git a/test/e2e/per_node_config_test.go b/test/e2e/per_node_config_test.go index 81c544b61..206fc6090 100644 --- a/test/e2e/per_node_config_test.go +++ b/test/e2e/per_node_config_test.go @@ -29,15 +29,17 @@ func multiDcInitialTokens(t *testing.T, ctx context.Context, namespace string, f checkDatacenterReady(t, ctx, dc1Key, f) assertCassandraDatacenterK8cStatusReady(ctx, t, f, kcKey, dc1Key.Name) dc1Prefix := DcPrefix(t, f, dc1Key) + configKey1Prefix := DcPrefixOverride(t, f, dc1Key) checkDatacenterReady(t, ctx, dc2Key, f) assertCassandraDatacenterK8cStatusReady(ctx, t, f, kcKey, dc2Key.Name) dc2Prefix := DcPrefix(t, f, dc2Key) + configKey2Prefix := DcPrefixOverride(t, f, dc2Key) t.Log("check that the ConfigMaps were created") - perNodeConfigMapKey1 := framework.NewClusterKey(f.DataPlaneContexts[0], namespace, fmt.Sprintf("%s-per-node-config", dc1Prefix)) - perNodeConfigMapKey2 := framework.NewClusterKey(f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-per-node-config", dc2Prefix)) + perNodeConfigMapKey1 := framework.NewClusterKey(f.DataPlaneContexts[0], namespace, fmt.Sprintf("%s-per-node-config", configKey1Prefix)) + perNodeConfigMapKey2 := framework.NewClusterKey(f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-per-node-config", configKey2Prefix)) assert.Eventually(t, func() bool { return f.Get(ctx, perNodeConfigMapKey1, &corev1.ConfigMap{}) == nil && diff --git a/test/e2e/reaper_test.go b/test/e2e/reaper_test.go index 89df3e6b9..cf1a8c7c0 100644 --- a/test/e2e/reaper_test.go +++ b/test/e2e/reaper_test.go @@ -157,13 +157,14 @@ func createMultiReaper(t *testing.T, ctx context.Context, namespace string, f *f dc1Prefix := DcPrefix(t, f, dc1Key) dc2Prefix := DcPrefix(t, f, dc2Key) + reaperStargate2Prefix := DcPrefixOverride(t, f, dc2Key) reaper1Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[0], NamespacedName: types.NamespacedName{Namespace: namespace, Name: dc1Prefix + "-reaper"}} - reaper2Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{Namespace: namespace, Name: dc2Prefix + "-reaper"}} + reaper2Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{Namespace: namespace, Name: reaperStargate2Prefix + "-reaper"}} stargate1Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[0], NamespacedName: types.NamespacedName{Namespace: namespace, Name: dc1Prefix + "-stargate"}} - stargate2Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{Namespace: namespace, Name: dc2Prefix + "-stargate"}} + stargate2Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{Namespace: namespace, Name: reaperStargate2Prefix + "-stargate"}} - t.Logf("check Stargate auth keyspace created in both clusters. DC prefixes: %s / %s ", dc1Prefix, dc2Prefix) + t.Logf("check Stargate auth keyspace created in both clusters. DC prefixes: %s / %s ", dc1Prefix, reaperStargate2Prefix) checkKeyspaceExists(t, f, ctx, f.DataPlaneContexts[0], namespace, kc.SanitizedName(), dc1Prefix+"-default-sts-0", stargate.AuthKeyspace) checkKeyspaceExists(t, f, ctx, f.DataPlaneContexts[1], namespace, kc.SanitizedName(), dc2Prefix+"-default-sts-0", stargate.AuthKeyspace) @@ -207,9 +208,9 @@ func createMultiReaper(t *testing.T, ctx context.Context, namespace string, f *f stargateGrpcHostAndPort = ingressConfigs[f.DataPlaneContexts[1]].StargateGrpc stargateCqlHostAndPort = ingressConfigs[f.DataPlaneContexts[1]].StargateCql reaperRestHostAndPort = ingressConfigs[f.DataPlaneContexts[1]].ReaperRest - f.DeployStargateIngresses(t, f.DataPlaneContexts[1], namespace, dc2Prefix+"-stargate-service", stargateRestHostAndPort, stargateGrpcHostAndPort) - f.DeployReaperIngresses(t, f.DataPlaneContexts[1], namespace, dc2Prefix+"-reaper-service", reaperRestHostAndPort) - checkStargateApisReachable(t, ctx, f.DataPlaneContexts[1], namespace, dc2Prefix, stargateRestHostAndPort, stargateGrpcHostAndPort, stargateCqlHostAndPort, username, password, false, f) + f.DeployStargateIngresses(t, f.DataPlaneContexts[1], namespace, reaperStargate2Prefix+"-stargate-service", stargateRestHostAndPort, stargateGrpcHostAndPort) + f.DeployReaperIngresses(t, f.DataPlaneContexts[1], namespace, reaperStargate2Prefix+"-reaper-service", reaperRestHostAndPort) + checkStargateApisReachable(t, ctx, f.DataPlaneContexts[1], namespace, reaperStargate2Prefix, stargateRestHostAndPort, stargateGrpcHostAndPort, stargateCqlHostAndPort, username, password, false, f) checkReaperApiReachable(t, ctx, reaperRestHostAndPort) defer f.UndeployAllIngresses(t, f.DataPlaneContexts[0], namespace) @@ -241,10 +242,10 @@ func createMultiReaperWithEncryption(t *testing.T, ctx context.Context, namespac checkDatacenterReady(t, ctx, dc2Key, f) dc1Prefix := DcPrefix(t, f, dc1Key) - dc2Prefix := DcPrefix(t, f, dc2Key) + reaper2Prefix := DcPrefixOverride(t, f, dc2Key) reaper1Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[0], NamespacedName: types.NamespacedName{Namespace: namespace, Name: dc1Prefix + "-reaper"}} - reaper2Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{Namespace: namespace, Name: dc2Prefix + "-reaper"}} + reaper2Key := framework.ClusterKey{K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{Namespace: namespace, Name: reaper2Prefix + "-reaper"}} checkReaperReady(t, f, ctx, reaper1Key) checkReaperK8cStatusReady(t, f, ctx, kcKey, dc1Key) @@ -257,7 +258,7 @@ func createMultiReaperWithEncryption(t *testing.T, ctx context.Context, namespac f.DeployReaperIngresses(t, f.DataPlaneContexts[0], namespace, dc1Prefix+"-reaper-service", reaperRestHostAndPort) checkReaperApiReachable(t, ctx, reaperRestHostAndPort) reaperRestHostAndPort = ingressConfigs[f.DataPlaneContexts[1]].ReaperRest - f.DeployReaperIngresses(t, f.DataPlaneContexts[1], namespace, dc2Prefix+"-reaper-service", reaperRestHostAndPort) + f.DeployReaperIngresses(t, f.DataPlaneContexts[1], namespace, reaper2Prefix+"-reaper-service", reaperRestHostAndPort) checkReaperApiReachable(t, ctx, reaperRestHostAndPort) defer f.UndeployAllIngresses(t, f.DataPlaneContexts[0], namespace) diff --git a/test/e2e/stop_dc_test.go b/test/e2e/stop_dc_test.go index dbbcdf9d8..d981acf02 100644 --- a/test/e2e/stop_dc_test.go +++ b/test/e2e/stop_dc_test.go @@ -35,10 +35,11 @@ func stopAndRestartDc(t *testing.T, ctx context.Context, namespace string, f *fr dc1Prefix := DcPrefix(t, f, dc1Key) dc2Prefix := DcPrefix(t, f, dc2Key) + reaperStargate2Prefix := DcPrefixOverride(t, f, dc2Key) sg1Key := framework.NewClusterKey(f.DataPlaneContexts[0], namespace, fmt.Sprintf("%s-stargate", dc1Prefix)) reaper1Key := framework.NewClusterKey(f.DataPlaneContexts[0], namespace, fmt.Sprintf("%s-reaper", dc1Prefix)) - sg2Key := framework.NewClusterKey(f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-stargate", dc2Prefix)) - reaper2Key := framework.NewClusterKey(f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-reaper", dc2Prefix)) + sg2Key := framework.NewClusterKey(f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-stargate", reaperStargate2Prefix)) + reaper2Key := framework.NewClusterKey(f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-reaper", reaperStargate2Prefix)) checkStargateReady(t, f, ctx, sg1Key) checkReaperReady(t, f, ctx, reaper1Key) @@ -59,10 +60,10 @@ func stopAndRestartDc(t *testing.T, ctx context.Context, namespace string, f *fr stargateGrpcHostAndPort := ingressConfigs[f.DataPlaneContexts[1]].StargateGrpc stargateCqlHostAndPort := ingressConfigs[f.DataPlaneContexts[1]].StargateCql reaperRestHostAndPort := ingressConfigs[f.DataPlaneContexts[1]].ReaperRest - f.DeployStargateIngresses(t, f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-stargate-service", dc2Prefix), stargateRestHostAndPort, stargateGrpcHostAndPort) - f.DeployReaperIngresses(t, f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-reaper-service", dc2Prefix), reaperRestHostAndPort) + f.DeployStargateIngresses(t, f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-stargate-service", reaperStargate2Prefix), stargateRestHostAndPort, stargateGrpcHostAndPort) + f.DeployReaperIngresses(t, f.DataPlaneContexts[1], namespace, fmt.Sprintf("%s-reaper-service", reaperStargate2Prefix), reaperRestHostAndPort) defer f.UndeployAllIngresses(t, f.DataPlaneContexts[1], namespace) - checkStargateApisReachable(t, ctx, f.DataPlaneContexts[1], namespace, dc2Prefix, stargateRestHostAndPort, stargateGrpcHostAndPort, stargateCqlHostAndPort, username, password, false, f) + checkStargateApisReachable(t, ctx, f.DataPlaneContexts[1], namespace, reaperStargate2Prefix, stargateRestHostAndPort, stargateGrpcHostAndPort, stargateCqlHostAndPort, username, password, false, f) checkReaperApiReachable(t, ctx, reaperRestHostAndPort) pod1Name := fmt.Sprintf("%s-default-sts-0", dc1Prefix) @@ -71,7 +72,7 @@ func stopAndRestartDc(t *testing.T, ctx context.Context, namespace string, f *fr checkKeyspaceReplicationsUnaltered(t, f, ctx, f.DataPlaneContexts[1], namespace, pod2Name, DcName(t, f, dc1Key), DcName(t, f, dc2Key)) t.Run("TestApisDc1Stopped", func(t *testing.T) { - testStargateApis(t, f, ctx, f.DataPlaneContexts[1], namespace, dc2Prefix, username, password, false, map[string]int{DcName(t, f, dc2Key): 1}) + testStargateApis(t, f, ctx, f.DataPlaneContexts[1], namespace, reaperStargate2Prefix, username, password, false, map[string]int{DcName(t, f, dc2Key): 1}) uiKey := framework.NewClusterKey(f.DataPlaneContexts[1], namespace, reaper.DefaultUiSecretName("cluster1")) uiUsername, uiPassword := retrieveCredentials(t, f, ctx, uiKey) connectReaperApi(t, ctx, f.DataPlaneContexts[1], "cluster1", uiUsername, uiPassword) @@ -119,7 +120,7 @@ func stopAndRestartDc(t *testing.T, ctx context.Context, namespace string, f *fr t.Run("TestApisDcsRestarted", func(t *testing.T) { testStargateApis(t, f, ctx, f.DataPlaneContexts[0], namespace, dc1Prefix, username, password, false, map[string]int{DcName(t, f, dc1Key): 1, DcName(t, f, dc2Key): 1}) - testStargateApis(t, f, ctx, f.DataPlaneContexts[1], namespace, dc2Prefix, username, password, false, map[string]int{DcName(t, f, dc1Key): 1, DcName(t, f, dc2Key): 1}) + testStargateApis(t, f, ctx, f.DataPlaneContexts[1], namespace, reaperStargate2Prefix, username, password, false, map[string]int{DcName(t, f, dc1Key): 1, DcName(t, f, dc2Key): 1}) uiKey := framework.NewClusterKey(f.DataPlaneContexts[0], namespace, reaper.DefaultUiSecretName("cluster1")) uiUsername, uiPassword := retrieveCredentials(t, f, ctx, uiKey) testReaperApi(t, ctx, f.DataPlaneContexts[0], "cluster1", reaperapi.DefaultKeyspace, uiUsername, uiPassword) diff --git a/test/e2e/suite_test.go b/test/e2e/suite_test.go index 5a9b98d98..50703189e 100644 --- a/test/e2e/suite_test.go +++ b/test/e2e/suite_test.go @@ -803,7 +803,7 @@ func createSingleDatacenterCluster(t *testing.T, ctx context.Context, namespace // Check that the Cassandra cluster name override is passed to the cassdc without being modified checkCassandraClusterName(t, ctx, k8ssandra, dcKey, f) assertCassandraDatacenterK8cStatusReady(ctx, t, f, kcKey, dcKey.Name) - dcPrefix := DcPrefix(t, f, dcKey) + dcPrefix := DcPrefixOverride(t, f, dcKey) require.NoError(checkMetricsFiltersAbsence(t, ctx, f, dcKey)) require.NoError(checkInjectedContainersPresence(t, ctx, f, dcKey)) require.NoError(checkInjectedVolumePresence(t, ctx, f, dcKey, 4)) @@ -1242,7 +1242,7 @@ func addDcToCluster(t *testing.T, ctx context.Context, namespace string, f *fram K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{ Namespace: namespace, - Name: DcPrefix(t, f, dc2Key) + "-stargate", + Name: DcPrefixOverride(t, f, dc2Key) + "-stargate", }, } checkStargateReady(t, f, ctx, sg2Key) @@ -1251,7 +1251,7 @@ func addDcToCluster(t *testing.T, ctx context.Context, namespace string, f *fram K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{ Namespace: namespace, - Name: DcPrefix(t, f, dc2Key) + "-reaper", + Name: DcPrefixOverride(t, f, dc2Key) + "-reaper", }, } checkReaperReady(t, f, ctx, reaper2Key) @@ -1457,7 +1457,7 @@ func removeDcFromCluster(t *testing.T, ctx context.Context, namespace string, f K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{ Namespace: namespace, - Name: DcPrefix(t, f, dc2Key) + "-stargate", + Name: DcPrefixOverride(t, f, dc2Key) + "-stargate", }, } checkStargateReady(t, f, ctx, sg2Key) @@ -1466,7 +1466,7 @@ func removeDcFromCluster(t *testing.T, ctx context.Context, namespace string, f K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{ Namespace: namespace, - Name: DcPrefix(t, f, dc2Key) + "-reaper", + Name: DcPrefixOverride(t, f, dc2Key) + "-reaper", }, } checkReaperReady(t, f, ctx, reaper2Key) @@ -1493,7 +1493,7 @@ func removeDcFromCluster(t *testing.T, ctx context.Context, namespace string, f err = f.Client.Get(ctx, kcKey, kc) require.NoError(err, "failed to get K8ssandraCluster %s", kcKey) return kc.Status.Error != "None" && strings.Contains(kc.Status.Error, fmt.Sprintf("cannot decommission DC %s", dc2Name)) - }, 5*time.Minute, 5*time.Second, "timed out waiting for an error on dc2 removal") + }, 5*time.Minute, 1*time.Second, "timed out waiting for an error on dc2 removal") t.Log("alter keyspaces to remove replicas from DC2") _, err = f.ExecuteCql(ctx, f.DataPlaneContexts[0], namespace, kc.SanitizedName(), DcPrefix(t, f, dc1Key)+"-default-sts-0", @@ -1625,7 +1625,8 @@ func checkStargateApisWithMultiDcCluster(t *testing.T, ctx context.Context, name return kdcStatus.Stargate.IsReady() }, polling.k8ssandraClusterStatus.timeout, polling.k8ssandraClusterStatus.interval) - dc2Prefix := DcPrefix(t, f, dc2Key) + dc2Prefix := DcPrefixOverride(t, f, dc2Key) + dc2PodPrefix := DcPrefix(t, f, dc2Key) stargateKey = framework.ClusterKey{K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{Namespace: namespace, Name: dc2Prefix + "-stargate"}} checkStargateReady(t, f, ctx, stargateKey) @@ -1667,7 +1668,7 @@ func checkStargateApisWithMultiDcCluster(t *testing.T, ctx context.Context, name assert.NoError(t, err, "timed out waiting for nodetool status check against "+pod) t.Log("check nodes in dc2 see nodes in dc1") - pod = dc2Prefix + "-rack1-sts-0" + pod = dc2PodPrefix + "-rack1-sts-0" checkNodeToolStatus(t, f, f.DataPlaneContexts[1], namespace, pod, count, 0, "-u", username, "-pw", password) assert.NoError(t, err, "timed out waiting for nodetool status check against "+pod) @@ -1743,7 +1744,7 @@ func checkStargateApisWithMultiDcEncryptedCluster(t *testing.T, ctx context.Cont return kdcStatus.Stargate.IsReady() }, polling.k8ssandraClusterStatus.timeout, polling.k8ssandraClusterStatus.interval) - dc2Prefix := DcPrefix(t, f, dc2Key) + dc2Prefix := DcPrefixOverride(t, f, dc2Key) stargateKey = framework.ClusterKey{K8sContext: f.DataPlaneContexts[1], NamespacedName: types.NamespacedName{Namespace: namespace, Name: dc2Prefix + "-stargate"}} checkStargateReady(t, f, ctx, stargateKey) @@ -2137,6 +2138,17 @@ func configureZeroLog() { } func DcPrefix( + t *testing.T, + f *framework.E2eFramework, + dcKey framework.ClusterKey) string { + // Get the cassdc object + cassdc := &cassdcapi.CassandraDatacenter{} + err := f.Get(context.Background(), dcKey, cassdc) + require.NoError(t, err) + return framework.CleanupForKubernetes(fmt.Sprintf("%s-%s", cassdc.Spec.ClusterName, cassdc.Name)) +} + +func DcPrefixOverride( t *testing.T, f *framework.E2eFramework, dcKey framework.ClusterKey) string { @@ -2239,7 +2251,7 @@ func checkInjectedVolumePresence(t *testing.T, ctx context.Context, f *framework return fmt.Errorf("cannot find busybox injected container in pod template spec") } - cassandraPods, err := f.GetCassandraDatacenterPods(t, ctx, dcKey, cassdc.DatacenterName()) + cassandraPods, err := f.GetCassandraDatacenterPods(t, ctx, dcKey, cassdc.Name) require.NoError(t, err, "failed listing Cassandra pods") cassandraIndex, cassandraFound := findContainerInPod(t, cassandraPods[0], "cassandra") require.True(t, cassandraFound, "cannot find cassandra container in cassandra pod") diff --git a/test/framework/e2e_framework.go b/test/framework/e2e_framework.go index c5890dce4..f26300c89 100644 --- a/test/framework/e2e_framework.go +++ b/test/framework/e2e_framework.go @@ -626,7 +626,7 @@ func (f *E2eFramework) DumpClusterInfo(test string, namespaces ...string) error // Dump all objects that we need to investigate failures as a flat list and as yaml manifests for _, objectType := range []string{"K8ssandraCluster", "CassandraDatacenter", "Stargate", "Reaper", "StatefulSet", "Secrets", - "ReplicatedSecret", "ClientConfig", "CassandraTask", "MedusaBackup", "MedusaBackupJob", "MedusaRestoreJob", "MedusaTask"} { + "ReplicatedSecret", "ClientConfig", "CassandraTask", "MedusaBackup", "MedusaBackupJob", "MedusaRestoreJob", "MedusaTask", "ConfigMaps"} { if err := os.MkdirAll(fmt.Sprintf("%s/%s/objects/%s", outputDir, namespace, objectType), 0755); err != nil { return err }