From b29058770d1d15935f6f0387c7004e4fe4b67a94 Mon Sep 17 00:00:00 2001 From: andyzhangx Date: Sat, 4 Jan 2025 03:29:18 +0000 Subject: [PATCH] fix: switch to data plane API call when create snapshot is throttled fix fix --- pkg/azurefile/azurefile.go | 2 +- pkg/azurefile/controllerserver.go | 8 ++++++ pkg/azurefile/utils.go | 13 +++++++--- pkg/azurefile/utils_test.go | 42 +++++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 5 deletions(-) diff --git a/pkg/azurefile/azurefile.go b/pkg/azurefile/azurefile.go index 41b43beecc..21f76ecc4e 100644 --- a/pkg/azurefile/azurefile.go +++ b/pkg/azurefile/azurefile.go @@ -967,7 +967,7 @@ func (d *Driver) DeleteFileShare(ctx context.Context, subsID, resourceGroup, acc if isRetriableError(err) { klog.Warningf("DeleteFileShare(%s) on account(%s) failed with error(%v), waiting for retrying", shareName, accountName, err) - if strings.Contains(strings.ToLower(err.Error()), strings.ToLower(tooManyRequests)) { + if isThrottlingError(err) { klog.Warningf("switch to use data plane API instead for account %s since it's throttled", accountName) d.dataPlaneAPIAccountCache.Set(accountName, "") return true, err diff --git a/pkg/azurefile/controllerserver.go b/pkg/azurefile/controllerserver.go index 00cfda0e3d..6c5cd00b0b 100644 --- a/pkg/azurefile/controllerserver.go +++ b/pkg/azurefile/controllerserver.go @@ -877,6 +877,10 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ } } + if !useDataPlaneAPI { + useDataPlaneAPI = d.useDataPlaneAPI(sourceVolumeID, accountName) + } + mc := metrics.NewMetricContext(azureFileCSIDriverName, "controller_create_snapshot", rgName, subsID, d.Name) isOperationSucceeded := false defer func() { @@ -926,6 +930,10 @@ func (d *Driver) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshotRequ } else { snapshotShare, err := d.cloud.FileClient.WithSubscriptionID(subsID).CreateFileShare(ctx, rgName, accountName, &fileclient.ShareOptions{Name: fileShareName, Metadata: map[string]*string{snapshotNameKey: &snapshotName}}, snapshotsExpand) if err != nil { + if isThrottlingError(err) { + klog.Warningf("switch to use data plane API instead for account %s since it's throttled", accountName) + d.dataPlaneAPIAccountCache.Set(accountName, "") + } return nil, status.Errorf(codes.Internal, "create snapshot from(%s) failed with %v, accountName: %q", sourceVolumeID, err, accountName) } diff --git a/pkg/azurefile/utils.go b/pkg/azurefile/utils.go index 5afd8908c1..ca085ce106 100644 --- a/pkg/azurefile/utils.go +++ b/pkg/azurefile/utils.go @@ -135,11 +135,16 @@ func isRetriableError(err error) bool { return false } -func sleepIfThrottled(err error, defaultSleepSec int) { - if err == nil { - return +func isThrottlingError(err error) bool { + if err != nil { + errMsg := strings.ToLower(err.Error()) + return strings.Contains(errMsg, strings.ToLower(tooManyRequests)) || strings.Contains(errMsg, clientThrottled) } - if strings.Contains(strings.ToLower(err.Error()), strings.ToLower(tooManyRequests)) || strings.Contains(strings.ToLower(err.Error()), clientThrottled) { + return false +} + +func sleepIfThrottled(err error, defaultSleepSec int) { + if isThrottlingError(err) { retryAfter := getRetryAfterSeconds(err) if retryAfter == 0 { retryAfter = defaultSleepSec diff --git a/pkg/azurefile/utils_test.go b/pkg/azurefile/utils_test.go index 48109e65a5..9b87dd6a59 100644 --- a/pkg/azurefile/utils_test.go +++ b/pkg/azurefile/utils_test.go @@ -749,3 +749,45 @@ func TestIsReadOnlyFromCapability(t *testing.T) { } } } + +func TestIsThrottlingError(t *testing.T) { + tests := []struct { + desc string + err error + expected bool + }{ + { + desc: "nil error", + err: nil, + expected: false, + }, + { + desc: "no match", + err: errors.New("no match"), + expected: false, + }, + { + desc: "match error message", + err: errors.New("could not list storage accounts for account type Premium_LRS: Retriable: true, RetryAfter: 217s, HTTPStatusCode: 0, RawError: azure cloud provider throttled for operation StorageAccountListByResourceGroup with reason \"client throttled\""), + expected: true, + }, + { + desc: "match error message exceeds 1200s", + err: errors.New("could not list storage accounts for account type Premium_LRS: Retriable: true, RetryAfter: 2170s, HTTPStatusCode: 0, RawError: azure cloud provider throttled for operation StorageAccountListByResourceGroup with reason \"client throttled\""), + expected: true, + }, + { + desc: "match error message with TooManyRequests throttling", + err: errors.New("could not list storage accounts for account type Premium_LRS: Retriable: true, RetryAfter: 2170s, HTTPStatusCode: 429, RawError: azure cloud provider throttled for operation StorageAccountListByResourceGroup with reason \"TooManyRequests\""), + expected: true, + }, + } + + for _, test := range tests { + result := isThrottlingError(test.err) + if result != test.expected { + t.Errorf("desc: (%s), input: err(%v), IsThrottlingError returned with bool(%t), not equal to expected(%t)", + test.desc, test.err, result, test.expected) + } + } +}