Skip to content

Commit e2e5bcf

Browse files
authored
Handling CMK AccessDenied errors (#5420)
* Handling CMK Errors Signed-off-by: Alan Protasio <[email protected]> * lint Signed-off-by: Alan Protasio <[email protected]> * add test on BucketStores Signed-off-by: Alan Protasio <[email protected]> * fixing race Signed-off-by: Alan Protasio <[email protected]> * lint Signed-off-by: Alan Protasio <[email protected]> * Implementing error handling on labels apis Signed-off-by: Alan Protasio <[email protected]> * handling errros from thanos SG Signed-off-by: Alan Protasio <[email protected]> * creating IsOneOfTheExpectedErrors func Signed-off-by: Alan Protasio <[email protected]> --------- Signed-off-by: Alan Protasio <[email protected]>
1 parent 1dcb72d commit e2e5bcf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+767
-38
lines changed

Diff for: go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ require (
5151
github.com/sony/gobreaker v0.5.0
5252
github.com/spf13/afero v1.9.5
5353
github.com/stretchr/testify v1.8.4
54-
github.com/thanos-io/objstore v0.0.0-20230522103316-23ebe2eacadd
54+
github.com/thanos-io/objstore v0.0.0-20230629211010-ff1b35b9841a
5555
github.com/thanos-io/promql-engine v0.0.0-20230526105742-791d78b260ea
5656
github.com/thanos-io/thanos v0.31.1-0.20230627154113-7cfaf3fe2d43
5757
github.com/uber/jaeger-client-go v2.30.0+incompatible

Diff for: go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -1160,8 +1160,8 @@ github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXl
11601160
github.com/tencentyun/cos-go-sdk-v5 v0.7.40 h1:W6vDGKCHe4wBACI1d2UgE6+50sJFhRWU4O8IB2ozzxM=
11611161
github.com/thanos-community/galaxycache v0.0.0-20211122094458-3a32041a1f1e h1:f1Zsv7OAU9iQhZwigp50Yl38W10g/vd5NC8Rdk1Jzng=
11621162
github.com/thanos-community/galaxycache v0.0.0-20211122094458-3a32041a1f1e/go.mod h1:jXcofnrSln/cLI6/dhlBxPQZEEQHVPCcFaH75M+nSzM=
1163-
github.com/thanos-io/objstore v0.0.0-20230522103316-23ebe2eacadd h1:asQ0HomkaUXZuR3J7daBEusMS++3hkYsYM6u8gpmPWM=
1164-
github.com/thanos-io/objstore v0.0.0-20230522103316-23ebe2eacadd/go.mod h1:5V7lzXuaxwt6XFQoA/zJrhdnQrxq1+r0bwQ1iYOq3gM=
1163+
github.com/thanos-io/objstore v0.0.0-20230629211010-ff1b35b9841a h1:tXcVeuval1nzdHn1JXqaBmyjuEUcpDI9huPrUF04nR4=
1164+
github.com/thanos-io/objstore v0.0.0-20230629211010-ff1b35b9841a/go.mod h1:5V7lzXuaxwt6XFQoA/zJrhdnQrxq1+r0bwQ1iYOq3gM=
11651165
github.com/thanos-io/promql-engine v0.0.0-20230526105742-791d78b260ea h1:kzK8sBn2+mo3NAxP+XjAjAqr1hwfxxFUy5CybaBkjAI=
11661166
github.com/thanos-io/promql-engine v0.0.0-20230526105742-791d78b260ea/go.mod h1:eIgPaXWgOhNAv6CPPrgu09r0AtT7byBTZy+7WkX0D18=
11671167
github.com/thanos-io/thanos v0.31.1-0.20230627154113-7cfaf3fe2d43 h1:UHyTPGdDHAoNHuSce5cJ2vEi6g1v8D5ZFBWZ61uTHSM=

Diff for: pkg/compactor/blocks_cleaner.go

+4
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,10 @@ func (c *BlocksCleaner) cleanUser(ctx context.Context, userID string, firstRun b
325325
idx, err := bucketindex.ReadIndex(ctx, c.bucketClient, userID, c.cfgProvider, c.logger)
326326
if errors.Is(err, bucketindex.ErrIndexCorrupted) {
327327
level.Warn(userLogger).Log("msg", "found a corrupted bucket index, recreating it")
328+
} else if errors.Is(err, bucket.ErrCustomerManagedKeyAccessDenied) {
329+
// Give up cleaning if we get access denied
330+
level.Warn(userLogger).Log("msg", err.Error())
331+
return nil
328332
} else if err != nil && !errors.Is(err, bucketindex.ErrIndexNotFound) {
329333
return err
330334
}

Diff for: pkg/compactor/blocks_cleaner_test.go

+32-17
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package compactor
33
import (
44
"context"
55
"crypto/rand"
6-
"errors"
76
"fmt"
87
"path"
98
"strings"
@@ -17,14 +16,12 @@ import (
1716
prom_testutil "github.com/prometheus/client_golang/prometheus/testutil"
1817
"github.com/stretchr/testify/assert"
1918
"github.com/stretchr/testify/require"
20-
"github.com/thanos-io/objstore"
2119
"github.com/thanos-io/thanos/pkg/block"
2220
"github.com/thanos-io/thanos/pkg/block/metadata"
2321

2422
"github.com/cortexproject/cortex/pkg/storage/tsdb"
2523
"github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex"
2624
cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil"
27-
"github.com/cortexproject/cortex/pkg/util"
2825
"github.com/cortexproject/cortex/pkg/util/services"
2926
)
3027

@@ -57,6 +54,37 @@ func TestBlocksCleaner(t *testing.T) {
5754
}
5855
}
5956

57+
func TestBlockCleaner_KeyPermissionDenied(t *testing.T) {
58+
const userID = "user-1"
59+
60+
bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t)
61+
bucketClient = bucketindex.BucketWithGlobalMarkers(bucketClient)
62+
63+
// Create blocks.
64+
ctx := context.Background()
65+
deletionDelay := 12 * time.Hour
66+
bucketClient = &cortex_testutil.MockBucketFailure{
67+
Bucket: bucketClient,
68+
GetFailures: map[string]error{
69+
path.Join(userID, "bucket-index.json.gz"): cortex_testutil.ErrKeyAccessDeniedError,
70+
},
71+
}
72+
73+
cfg := BlocksCleanerConfig{
74+
DeletionDelay: deletionDelay,
75+
CleanupInterval: time.Minute,
76+
CleanupConcurrency: 1,
77+
}
78+
79+
logger := log.NewNopLogger()
80+
scanner := tsdb.NewUsersScanner(bucketClient, tsdb.AllUsers, logger)
81+
cfgProvider := newMockConfigProvider()
82+
83+
cleaner := NewBlocksCleaner(cfg, bucketClient, scanner, cfgProvider, logger, nil)
84+
err := cleaner.cleanUser(ctx, userID, true)
85+
require.NoError(t, err)
86+
}
87+
6088
func testBlocksCleanerWithOptions(t *testing.T, options testBlocksCleanerOptions) {
6189
bucketClient, _ := cortex_testutil.PrepareFilesystemBucket(t)
6290

@@ -254,7 +282,7 @@ func TestBlocksCleaner_ShouldContinueOnBlockDeletionFailure(t *testing.T) {
254282
createDeletionMark(t, bucketClient, userID, block4, now.Add(-deletionDelay).Add(-time.Hour))
255283

256284
// To emulate a failure deleting a block, we wrap the bucket client in a mocked one.
257-
bucketClient = &mockBucketFailure{
285+
bucketClient = &cortex_testutil.MockBucketFailure{
258286
Bucket: bucketClient,
259287
DeleteFailures: []string{path.Join(userID, block3.String(), metadata.MetaFilename)},
260288
}
@@ -658,19 +686,6 @@ func TestBlocksCleaner_ShouldRemoveBlocksOutsideRetentionPeriod(t *testing.T) {
658686
}
659687
}
660688

661-
type mockBucketFailure struct {
662-
objstore.Bucket
663-
664-
DeleteFailures []string
665-
}
666-
667-
func (m *mockBucketFailure) Delete(ctx context.Context, name string) error {
668-
if util.StringsContain(m.DeleteFailures, name) {
669-
return errors.New("mocked delete failure")
670-
}
671-
return m.Bucket.Delete(ctx, name)
672-
}
673-
674689
type mockConfigProvider struct {
675690
userRetentionPeriods map[string]time.Duration
676691
}

Diff for: pkg/querier/blocks_finder_bucket_index.go

+7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"github.com/prometheus/client_golang/prometheus"
1111
"github.com/thanos-io/objstore"
1212

13+
"github.com/cortexproject/cortex/pkg/util/validation"
14+
1315
"github.com/cortexproject/cortex/pkg/storage/bucket"
1416
"github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex"
1517
"github.com/cortexproject/cortex/pkg/util/services"
@@ -62,6 +64,11 @@ func (f *BucketIndexBlocksFinder) GetBlocks(ctx context.Context, userID string,
6264
// so the bucket index hasn't been created yet.
6365
return nil, nil, nil
6466
}
67+
68+
if errors.Is(err, bucket.ErrCustomerManagedKeyAccessDenied) {
69+
return nil, nil, validation.AccessDeniedError(err.Error())
70+
}
71+
6572
if err != nil {
6673
return nil, nil, err
6774
}

Diff for: pkg/querier/blocks_finder_bucket_index_test.go

+20
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ import (
1313
"github.com/stretchr/testify/require"
1414
"github.com/thanos-io/objstore"
1515

16+
"github.com/cortexproject/cortex/pkg/util/validation"
17+
1618
"github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex"
1719
cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil"
1820
"github.com/cortexproject/cortex/pkg/util/services"
@@ -241,3 +243,21 @@ func prepareBucketIndexBlocksFinder(t testing.TB, bkt objstore.Bucket) *BucketIn
241243

242244
return finder
243245
}
246+
247+
func TestBucketIndexBlocksFinder_GetBlocks_KeyPermissionDenied(t *testing.T) {
248+
const userID = "user-1"
249+
bkt, _ := cortex_testutil.PrepareFilesystemBucket(t)
250+
251+
bkt = &cortex_testutil.MockBucketFailure{
252+
Bucket: bkt,
253+
GetFailures: map[string]error{
254+
path.Join(userID, "bucket-index.json.gz"): cortex_testutil.ErrKeyAccessDeniedError,
255+
},
256+
}
257+
258+
finder := prepareBucketIndexBlocksFinder(t, bkt)
259+
260+
_, _, err := finder.GetBlocks(context.Background(), userID, 0, 100)
261+
expected := validation.AccessDeniedError("error")
262+
require.IsType(t, expected, err)
263+
}

Diff for: pkg/querier/error_translate_queryable.go

+3
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ func TranslateToPromqlAPIError(err error) error {
3939
case validation.LimitError:
4040
// This will be returned with status code 422 by Prometheus API.
4141
return err
42+
case validation.AccessDeniedError:
43+
// This will be returned with status code 422 by Prometheus API.
44+
return err
4245
default:
4346
if errors.Is(err, context.Canceled) {
4447
return err // 422

Diff for: pkg/querier/error_translate_queryable_test.go

+6
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,12 @@ func TestApiStatusCodes(t *testing.T) {
4343
expectedCode: 422,
4444
},
4545

46+
{
47+
err: validation.AccessDeniedError("access denied"),
48+
expectedString: "access denied",
49+
expectedCode: 422,
50+
},
51+
4652
{
4753
err: promql.ErrTooManySamples("query execution"),
4854
expectedString: "too many samples",

Diff for: pkg/storage/bucket/client.go

+2
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,8 @@ var (
4040
SupportedBackends = []string{S3, GCS, Azure, Swift, Filesystem}
4141

4242
ErrUnsupportedStorageBackend = errors.New("unsupported storage backend")
43+
44+
ErrCustomerManagedKeyAccessDenied = errors.New("access denied: customer key")
4345
)
4446

4547
// Config holds configuration for accessing long-term storage.

Diff for: pkg/storage/bucket/client_mock.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@ import (
1212
"github.com/thanos-io/objstore"
1313
)
1414

15-
var errObjectDoesNotExist = errors.New("object does not exist")
15+
var (
16+
errObjectDoesNotExist = errors.New("object does not exist")
17+
errKeyPermissionDenied = errors.New("object key permission denied")
18+
)
1619

1720
// ClientMock mocks objstore.Bucket
1821
type ClientMock struct {
@@ -175,6 +178,11 @@ func (m *ClientMock) IsObjNotFoundErr(err error) bool {
175178
return err == errObjectDoesNotExist
176179
}
177180

181+
// IsCustomerManagedKeyError mocks objstore.Bucket.IsCustomerManagedKeyError()
182+
func (m *ClientMock) IsCustomerManagedKeyError(err error) bool {
183+
return err == errKeyPermissionDenied
184+
}
185+
178186
// ObjectSize mocks objstore.Bucket.Attributes()
179187
func (m *ClientMock) Attributes(ctx context.Context, name string) (objstore.ObjectAttributes, error) {
180188
args := m.Called(ctx, name)

Diff for: pkg/storage/bucket/prefixed_bucket_client.go

+7-2
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,23 @@ func (b *PrefixedBucketClient) IsObjNotFoundErr(err error) bool {
7373
return b.bucket.IsObjNotFoundErr(err)
7474
}
7575

76+
// IsCustomerManagedKeyError returns true if the permissions for key used to encrypt the object was revoked.
77+
func (b *PrefixedBucketClient) IsCustomerManagedKeyError(err error) bool {
78+
return b.bucket.IsCustomerManagedKeyError(err)
79+
}
80+
7681
// Attributes returns attributes of the specified object.
7782
func (b *PrefixedBucketClient) Attributes(ctx context.Context, name string) (objstore.ObjectAttributes, error) {
7883
return b.bucket.Attributes(ctx, b.fullName(name))
7984
}
8085

81-
// WithExpectedErrs allows to specify a filter that marks certain errors as expected, so it will not increment
86+
// ReaderWithExpectedErrs allows to specify a filter that marks certain errors as expected, so it will not increment
8287
// thanos_objstore_bucket_operation_failures_total metric.
8388
func (b *PrefixedBucketClient) ReaderWithExpectedErrs(fn objstore.IsOpFailureExpectedFunc) objstore.BucketReader {
8489
return b.WithExpectedErrs(fn)
8590
}
8691

87-
// ReaderWithExpectedErrs allows to specify a filter that marks certain errors as expected, so it will not increment
92+
// WithExpectedErrs allows to specify a filter that marks certain errors as expected, so it will not increment
8893
// thanos_objstore_bucket_operation_failures_total metric.
8994
func (b *PrefixedBucketClient) WithExpectedErrs(fn objstore.IsOpFailureExpectedFunc) objstore.Bucket {
9095
if ib, ok := b.bucket.(objstore.InstrumentedBucket); ok {

Diff for: pkg/storage/bucket/s3/bucket_client.go

+5-1
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ func (b *BucketWithRetries) retry(ctx context.Context, f func() error, operation
115115
if lastErr == nil {
116116
return nil
117117
}
118-
if b.bucket.IsObjNotFoundErr(lastErr) {
118+
if b.bucket.IsObjNotFoundErr(lastErr) || b.bucket.IsCustomerManagedKeyError(lastErr) {
119119
return lastErr
120120
}
121121
retries.Wait()
@@ -194,6 +194,10 @@ func (b *BucketWithRetries) IsObjNotFoundErr(err error) bool {
194194
return b.bucket.IsObjNotFoundErr(err)
195195
}
196196

197+
func (b *BucketWithRetries) IsCustomerManagedKeyError(err error) bool {
198+
return b.bucket.IsCustomerManagedKeyError(err)
199+
}
200+
197201
func (b *BucketWithRetries) Close() error {
198202
return b.bucket.Close()
199203
}

Diff for: pkg/storage/bucket/s3/bucket_client_test.go

+55-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package s3
33
import (
44
"bytes"
55
"context"
6+
"errors"
67
"fmt"
78
"io"
89
"testing"
@@ -13,6 +14,49 @@ import (
1314
"github.com/thanos-io/objstore"
1415
)
1516

17+
var (
18+
errNotFound = errors.New("not found")
19+
errKeyDenied = errors.New("key denied")
20+
)
21+
22+
func TestBucketWithRetries_ShouldRetry(t *testing.T) {
23+
t.Parallel()
24+
25+
cases := map[string]struct {
26+
err error
27+
retryCount int
28+
}{
29+
"should not retry on not found": {
30+
err: errNotFound,
31+
retryCount: 1,
32+
},
33+
"should not retry on key access denied": {
34+
err: errKeyDenied,
35+
retryCount: 1,
36+
},
37+
}
38+
39+
for name, tc := range cases {
40+
t.Run(name, func(*testing.T) {
41+
m := mockBucket{
42+
FailCount: 3,
43+
errToReturn: tc.err,
44+
}
45+
46+
b := BucketWithRetries{
47+
logger: log.NewNopLogger(),
48+
bucket: &m,
49+
operationRetries: 5,
50+
retryMinBackoff: 10 * time.Millisecond,
51+
retryMaxBackoff: time.Second,
52+
}
53+
54+
_, _ = b.Get(context.Background(), "something")
55+
require.Equal(t, 1, m.calledCount)
56+
})
57+
}
58+
}
59+
1660
func TestBucketWithRetries_UploadSeekable(t *testing.T) {
1761
t.Parallel()
1862

@@ -102,6 +146,9 @@ func (f *fakeReader) Read(p []byte) (n int, err error) {
102146
type mockBucket struct {
103147
FailCount int
104148
uploadedContent []byte
149+
errToReturn error
150+
151+
calledCount int
105152
}
106153

107154
// Upload mocks objstore.Bucket.Upload()
@@ -135,7 +182,8 @@ func (m *mockBucket) Iter(ctx context.Context, dir string, f func(string) error,
135182

136183
// Get mocks objstore.Bucket.Get()
137184
func (m *mockBucket) Get(ctx context.Context, name string) (io.ReadCloser, error) {
138-
return nil, nil
185+
m.calledCount++
186+
return nil, m.errToReturn
139187
}
140188

141189
// GetRange mocks objstore.Bucket.GetRange()
@@ -150,7 +198,12 @@ func (m *mockBucket) Exists(ctx context.Context, name string) (bool, error) {
150198

151199
// IsObjNotFoundErr mocks objstore.Bucket.IsObjNotFoundErr()
152200
func (m *mockBucket) IsObjNotFoundErr(err error) bool {
153-
return false
201+
return err == errNotFound
202+
}
203+
204+
// IsCustomerManagedKeyError mocks objstore.Bucket.IsCustomerManagedKeyError()
205+
func (m *mockBucket) IsCustomerManagedKeyError(err error) bool {
206+
return err == errKeyDenied
154207
}
155208

156209
// ObjectSize mocks objstore.Bucket.Attributes()

Diff for: pkg/storage/bucket/sse_bucket_client.go

+4
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,10 @@ func (b *SSEBucketClient) IsObjNotFoundErr(err error) bool {
119119
return b.bucket.IsObjNotFoundErr(err)
120120
}
121121

122+
func (b *SSEBucketClient) IsCustomerManagedKeyError(err error) bool {
123+
return b.bucket.IsCustomerManagedKeyError(err)
124+
}
125+
122126
// Attributes implements objstore.Bucket.
123127
func (b *SSEBucketClient) Attributes(ctx context.Context, name string) (objstore.ObjectAttributes, error) {
124128
return b.bucket.Attributes(ctx, name)

Diff for: pkg/storage/tsdb/bucketindex/loader.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ func (l *Loader) GetIndex(ctx context.Context, userID string) (*Index, error) {
115115

116116
if errors.Is(err, ErrIndexNotFound) {
117117
level.Warn(l.logger).Log("msg", "bucket index not found", "user", userID)
118+
} else if errors.Is(err, bucket.ErrCustomerManagedKeyAccessDenied) {
119+
level.Warn(l.logger).Log("msg", "key access denied when reading bucket index", "user", userID)
118120
} else {
119121
// We don't track ErrIndexNotFound as failure because it's a legit case (eg. a tenant just
120122
// started to remote write and its blocks haven't uploaded to storage yet).
@@ -196,7 +198,7 @@ func (l *Loader) updateCachedIndex(ctx context.Context, userID string) {
196198
l.loadAttempts.Inc()
197199
startTime := time.Now()
198200
idx, err := ReadIndex(readCtx, l.bkt, userID, l.cfgProvider, l.logger)
199-
if err != nil && !errors.Is(err, ErrIndexNotFound) {
201+
if err != nil && !errors.Is(err, ErrIndexNotFound) && !errors.Is(err, bucket.ErrCustomerManagedKeyAccessDenied) {
200202
l.loadFailures.Inc()
201203
level.Warn(l.logger).Log("msg", "unable to update bucket index", "user", userID, "err", err)
202204
return

0 commit comments

Comments
 (0)