Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add orphan checkpoint retention policy #60

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,12 @@ jobs:

- name: Test Max Checkpoints Set to 1
run: sudo -E bats -f "test_max_checkpoints_set_to_1" ./test/run_tests.bats

- name: Test Max Total Checkpoint Size
run: sudo -E bats -f "test_max_total_checkpoint_size" ./test/run_tests.bats

- name: Test Max Checkpoint Size
run: sudo -E bats -f "test_max_checkpoint_size" ./test/run_tests.bats

- name: Test orphan retention
run: sudo -E bats -f "test_orphan_retention_policy" ./test/run_tests.bats
40 changes: 28 additions & 12 deletions api/v1/checkpointrestoreoperator_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,27 +34,41 @@ type CheckpointRestoreOperatorSpec struct {
}

type GlobalPolicySpec struct {
MaxCheckpointsPerNamespaces *int `json:"maxCheckpointsPerNamespace,omitempty"`
MaxCheckpointsPerPod *int `json:"maxCheckpointsPerPod,omitempty"`
MaxCheckpointsPerContainer *int `json:"maxCheckpointsPerContainer,omitempty"`
RetainOrphan *bool `json:"retainOrphan,omitempty"`
MaxCheckpointsPerNamespaces *int `json:"maxCheckpointsPerNamespace,omitempty"`
MaxCheckpointsPerPod *int `json:"maxCheckpointsPerPod,omitempty"`
MaxCheckpointsPerContainer *int `json:"maxCheckpointsPerContainer,omitempty"`
MaxCheckpointSize *int `json:"maxCheckpointSize,omitempty"`
MaxTotalSizePerNamespace *int `json:"maxTotalSizePerNamespace,omitempty"`
MaxTotalSizePerPod *int `json:"maxTotalSizePerPod,omitempty"`
MaxTotalSizePerContainer *int `json:"maxTotalSizePerContainer,omitempty"`
}

type ContainerPolicySpec struct {
Namespace string `json:"namespace,omitempty"`
Pod string `json:"pod,omitempty"`
Container string `json:"container,omitempty"`
MaxCheckpoints *int64 `json:"maxCheckpoints,omitempty"`
Namespace string `json:"namespace,omitempty"`
Pod string `json:"pod,omitempty"`
Container string `json:"container,omitempty"`
RetainOrphan *bool `json:"retainOrphan,omitempty"`
MaxCheckpoints *int `json:"maxCheckpoints,omitempty"`
MaxCheckpointSize *int `json:"maxCheckpointSize,omitempty"`
MaxTotalSize *int `json:"maxTotalSize,omitempty"`
}

type PodPolicySpec struct {
Namespace string `json:"namespace,omitempty"`
Pod string `json:"pod,omitempty"`
MaxCheckpoints *int64 `json:"maxCheckpoints,omitempty"`
Namespace string `json:"namespace,omitempty"`
Pod string `json:"pod,omitempty"`
RetainOrphan *bool `json:"retainOrphan,omitempty"`
MaxCheckpoints *int `json:"maxCheckpoints,omitempty"`
MaxCheckpointSize *int `json:"maxCheckpointSize,omitempty"`
MaxTotalSize *int `json:"maxTotalSize,omitempty"`
}

type NamespacePolicySpec struct {
Namespace string `json:"namespace,omitempty"`
MaxCheckpoints *int64 `json:"maxCheckpoints,omitempty"`
Namespace string `json:"namespace,omitempty"`
RetainOrphan *bool `json:"retainOrphan,omitempty"`
MaxCheckpoints *int `json:"maxCheckpoints,omitempty"`
MaxCheckpointSize *int `json:"maxCheckpointSize,omitempty"`
MaxTotalSize *int `json:"maxTotalSize,omitempty"`
}

// CheckpointRestoreOperatorStatus defines the observed state of CheckpointRestoreOperator
Expand All @@ -64,6 +78,8 @@ type CheckpointRestoreOperatorStatus struct {

//+kubebuilder:object:root=true
//+kubebuilder:subresource:status
//+kubebuilder:rbac:groups="",resources=pods,verbs=get;list;watch
//+kubebuilder:rbac:groups="",resources=namespaces,verbs=get;list;watch

// CheckpointRestoreOperator is the Schema for the checkpointrestoreoperators API
type CheckpointRestoreOperator struct {
Expand Down
76 changes: 73 additions & 3 deletions api/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 28 additions & 3 deletions config/crd/bases/criu.org_checkpointrestoreoperators.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,44 +52,69 @@ spec:
properties:
container:
type: string
maxCheckpointSize:
type: integer
maxCheckpoints:
format: int64
type: integer
maxTotalSize:
type: integer
namespace:
type: string
pod:
type: string
retainOrphan:
type: boolean
type: object
type: array
globalPolicy:
properties:
maxCheckpointSize:
type: integer
maxCheckpointsPerContainer:
type: integer
maxCheckpointsPerNamespace:
type: integer
maxCheckpointsPerPod:
type: integer
maxTotalSizePerContainer:
type: integer
maxTotalSizePerNamespace:
type: integer
maxTotalSizePerPod:
type: integer
retainOrphan:
type: boolean
type: object
namespacePolicies:
items:
properties:
maxCheckpointSize:
type: integer
maxCheckpoints:
format: int64
type: integer
maxTotalSize:
type: integer
namespace:
type: string
retainOrphan:
type: boolean
type: object
type: array
podPolicies:
items:
properties:
maxCheckpointSize:
type: integer
maxCheckpoints:
format: int64
type: integer
maxTotalSize:
type: integer
namespace:
type: string
pod:
type: string
retainOrphan:
type: boolean
type: object
type: array
type: object
Expand Down
16 changes: 16 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,22 @@ kind: ClusterRole
metadata:
name: manager-role
rules:
- apiGroups:
- ""
resources:
- namespaces
verbs:
- get
- list
- watch
- apiGroups:
- ""
resources:
- pods
verbs:
- get
- list
- watch
- apiGroups:
- criu.org
resources:
Expand Down
12 changes: 12 additions & 0 deletions config/samples/_v1_checkpointrestoreoperator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,30 @@ spec:
checkpointDirectory: /var/lib/kubelet/checkpoints
applyPoliciesImmediately: false
globalPolicy:
retainOrphan: true
maxCheckpointsPerNamespace: 50
maxCheckpointsPerPod: 30
maxCheckpointsPerContainer: 10
maxCheckpointSize: 10
maxTotalSizePerNamespace: 1000
maxTotalSizePerPod: 500
maxTotalSizePerContainer: 100
# containerPolicies:
# - namespace: <namespace>
# pod: <pod_name>
# container: <container_name>
# retainOrphan: false
# maxCheckpoints: 5
# maxCheckpointSize: 10
# maxTotalSize: 100
# podPolicies:
# - namespace: <namespace>
# pod: <pod_name>
# maxCheckpoints: 10
# maxCheckpointSize: 10
# maxTotalSize: 500
# namespacePolicies:
# - namespace: <namespace>
# maxCheckpoints: 15
# maxCheckpointSize: 10
# maxTotalSize: 1000
22 changes: 21 additions & 1 deletion docs/retention_policy.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ spec:
checkpointDirectory: /var/lib/kubelet/checkpoints
applyPoliciesImmediately: false
globalPolicy:
retainOrphan: true
maxCheckpointsPerNamespace: 50
maxCheckpointsPerPod: 30
maxCheckpointsPerContainer: 10
Expand All @@ -35,14 +36,19 @@ spec:
# - namespace: <namespace>
# pod: <pod_name>
# container: <container_name>
# retainOrphan: false # Set to false will delete all orphan checkpoints
# maxCheckpoints: 5
# maxCheckpointSize: 6 # Maximum size of a single checkpoint in MB
# maxTotalSize: 20 # Maximum total size of checkpoints for the container in MB
# podPolicies:
# - namespace: <namespace>
# pod: <pod_name>
# maxCheckpoints: 10
# maxCheckpointSize: 8 # Maximum size of a single checkpoint in MB
# maxTotalSize: 50 # Maximum total size of checkpoints for the pod in MB
# namespacePolicies:
# - namespace: <namespace>
# maxCheckpoints: 15`
# maxCheckpoints: 15`
```
A sample configuration file is available [here](/config/samples/_v1_checkpointrestoreoperator.yaml).

Expand All @@ -51,21 +57,35 @@ A sample configuration file is available [here](/config/samples/_v1_checkpointre
- `checkpointDirectory`: Specifies the directory where checkpoints are stored.
- `applyPoliciesImmediately`: If set to `true`, the policies are applied immediately. If `false` (default value), they are applied after new checkpoint creation.
- `globalPolicy`: Defines global checkpoint retention limits.
- `retainOrphan`: If set to `true` (default), orphan checkpoints (checkpoints whose associated resources have been deleted) will be retained. If set to `false`, orphan checkpoints will be automatically deleted. This is particularly useful for transient checkpoints used to recover from errors by replacing 'container restart' with 'container restore'.
- `maxCheckpointsPerNamespace`: Maximum number of checkpoints per namespace.
- `maxCheckpointsPerPod`: Maximum number of checkpoints per pod.
- `maxCheckpointsPerContainer`: Maximum number of checkpoints per container.
- `maxCheckpointSize`: Maximum size of a single checkpoint in MB.
- `maxTotalSizePerNamespace`: Maximum total size of checkpoints per namespace in MB.
- `maxTotalSizePerPod`: Maximum total size of checkpoints per pod in MB.
- `maxTotalSizePerContainer`: Maximum total size of checkpoints per container in MB.
- `containerPolicies` (optional): Specific retention policies for containers.
- `namespace`: Namespace of the container.
- `pod`: Pod name of the container.
- `container`: Container name.
- `retainOrphan`: If set to `true` (default), orphan checkpoints for this container will be retained. If set to `false`, orphan checkpoints will be deleted.
- `maxCheckpoints`: Maximum number of checkpoints for the container.
- `maxCheckpointSize`: Maximum size of a single checkpoint in MB.
- `maxTotalSize`: Maximum total size of checkpoints for the container in MB.
- `podPolicies` (optional): Specific retention policies for pods.
- `namespace`: Namespace of the pod.
- `pod`: Pod name.
- `retainOrphan`: If set to `true` (default), orphan checkpoints for this pod will be retained. If set to `false`, orphan checkpoints will be deleted.
- `maxCheckpoints`: Maximum number of checkpoints for the pod.
- `maxCheckpointSize`: Maximum size of a single checkpoint in MB.
- `maxTotalSize`: Maximum total size of checkpoints for the pod in MB.
- `namespacePolicies` (optional): Specific retention policies for namespaces.
- `namespace`: Namespace name.
- `retainOrphan`: If set to `true` (default), orphan checkpoints for this namespace will be retained. If set to `false`, orphan checkpoints will be deleted.
- `maxCheckpoints`: Maximum number of checkpoints for the namespace.
- `maxCheckpointSize`: Maximum size of a single checkpoint in MB.
- `maxTotalSize`: Maximum total size of checkpoints for the namespace in MB.

## Policy Hierarchy and Application

Expand Down
Loading
Loading