Skip to content

Commit c564e7a

Browse files
Jagpreet Singh Tamberjagpreetstamber
authored andcommitted
Sanitize URLs for bucket fetch error messages.
Co-authored-by: Jagpreet Singh Tamber <[email protected]> Signed-off-by: Stefan Prodan <[email protected]>
1 parent 9a0f9af commit c564e7a

File tree

3 files changed

+187
-2
lines changed

3 files changed

+187
-2
lines changed

internal/controller/bucket_controller.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,7 @@ func fetchEtagIndex(ctx context.Context, provider BucketProvider, obj *bucketv1.
728728
path := filepath.Join(tempDir, sourceignore.IgnoreFile)
729729
if _, err := provider.FGetObject(ctxTimeout, obj.Spec.BucketName, sourceignore.IgnoreFile, path); err != nil {
730730
if !provider.ObjectIsNotFound(err) {
731-
return err
731+
return fmt.Errorf("failed to get Etag for '%s' object: %w", sourceignore.IgnoreFile, serror.SanitizeError(err))
732732
}
733733
}
734734
ps, err := sourceignore.ReadIgnoreFile(path, nil)
@@ -792,7 +792,7 @@ func fetchIndexFiles(ctx context.Context, provider BucketProvider, obj *bucketv1
792792
index.Delete(k)
793793
return nil
794794
}
795-
return fmt.Errorf("failed to get '%s' object: %w", k, err)
795+
return fmt.Errorf("failed to get '%s' object: %w", k, serror.SanitizeError(err))
796796
}
797797
if t != etag {
798798
index.Add(k, etag)

internal/error/sanitized.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package error
2+
3+
import (
4+
"fmt"
5+
"net/url"
6+
"regexp"
7+
)
8+
9+
type SanitizedError struct {
10+
err string
11+
}
12+
13+
func (e SanitizedError) Error() string {
14+
return e.err
15+
}
16+
17+
// SanitizeError extracts all URLs from the error message
18+
// and replaces them with the URL without the query string.
19+
func SanitizeError(err error) SanitizedError {
20+
errorMessage := err.Error()
21+
for _, u := range extractURLs(errorMessage) {
22+
urlWithoutQueryString, err := removeQueryString(u)
23+
if err == nil {
24+
re, err := regexp.Compile(fmt.Sprintf("%s*", regexp.QuoteMeta(u)))
25+
if err == nil {
26+
errorMessage = re.ReplaceAllString(errorMessage, urlWithoutQueryString)
27+
}
28+
}
29+
}
30+
31+
return SanitizedError{errorMessage}
32+
}
33+
34+
// removeQueryString takes a URL string as input and returns the URL without the query string.
35+
func removeQueryString(urlStr string) (string, error) {
36+
// Parse the URL.
37+
u, err := url.Parse(urlStr)
38+
if err != nil {
39+
return "", err
40+
}
41+
42+
// Rebuild the URL without the query string.
43+
u.RawQuery = ""
44+
return u.String(), nil
45+
}
46+
47+
// extractURLs takes a log message as input and returns the URLs found.
48+
func extractURLs(logMessage string) []string {
49+
// Define a regular expression to match a URL.
50+
// This is a simple pattern and might need to be adjusted depending on the log message format.
51+
urlRegex := regexp.MustCompile(`https?://[^\s]+`)
52+
53+
// Find the first match in the log message.
54+
matches := urlRegex.FindAllString(logMessage, -1)
55+
if len(matches) == 0 {
56+
return []string{}
57+
}
58+
59+
return matches
60+
}

internal/error/sanitized_test.go

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
package error
2+
3+
import (
4+
"errors"
5+
"testing"
6+
7+
. "github.com/onsi/gomega"
8+
)
9+
10+
func Test_extractURLs(t *testing.T) {
11+
12+
tests := []struct {
13+
name string
14+
logMessage string
15+
wantUrls []string
16+
}{
17+
{
18+
name: "Log Contains single URL",
19+
logMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\": dial tcp 20.60.53.129:443: connect: connection refused",
20+
wantUrls: []string{"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\":"},
21+
},
22+
{
23+
name: "Log Contains multiple URL",
24+
logMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no : dial tcp 20.60.53.129:443: connect: connection refused",
25+
wantUrls: []string{
26+
"https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es",
27+
"https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no",
28+
},
29+
},
30+
{
31+
name: "Log Contains No URL",
32+
logMessage: "Log message without URL",
33+
wantUrls: []string{},
34+
},
35+
}
36+
37+
for _, tt := range tests {
38+
t.Run(tt.name, func(t *testing.T) {
39+
g := NewWithT(t)
40+
41+
urls := extractURLs(tt.logMessage)
42+
43+
g.Expect(len(urls)).To(Equal(len(tt.wantUrls)))
44+
for i := range tt.wantUrls {
45+
g.Expect(urls[i]).To(Equal(tt.wantUrls[i]))
46+
}
47+
})
48+
}
49+
}
50+
51+
func Test_removeQueryString(t *testing.T) {
52+
53+
tests := []struct {
54+
name string
55+
urlStr string
56+
wantUrl string
57+
}{
58+
{
59+
name: "URL with query string",
60+
urlStr: "https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02",
61+
wantUrl: "https://blobstorage.blob.core.windows.net/container/index.yaml",
62+
},
63+
{
64+
name: "URL without query string",
65+
urlStr: "https://blobstorage.blob.core.windows.net/container/index.yaml",
66+
wantUrl: "https://blobstorage.blob.core.windows.net/container/index.yaml",
67+
},
68+
{
69+
name: "URL with query string and port",
70+
urlStr: "https://blobstorage.blob.core.windows.net:443/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02",
71+
wantUrl: "https://blobstorage.blob.core.windows.net:443/container/index.yaml",
72+
},
73+
{
74+
name: "Invalid URL",
75+
urlStr: "NoUrl",
76+
wantUrl: "NoUrl",
77+
},
78+
}
79+
80+
for _, tt := range tests {
81+
t.Run(tt.name, func(t *testing.T) {
82+
g := NewWithT(t)
83+
84+
urlWithoutQueryString, err := removeQueryString(tt.urlStr)
85+
86+
g.Expect(err).To(BeNil())
87+
g.Expect(urlWithoutQueryString).To(Equal(tt.wantUrl))
88+
})
89+
}
90+
}
91+
92+
func Test_SanitizeError(t *testing.T) {
93+
94+
tests := []struct {
95+
name string
96+
errMessage string
97+
wantErrMessage string
98+
}{
99+
{
100+
name: "Log message with URL with query string",
101+
errMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\": dial tcp 20.60.53.129:443: connect: connection refused",
102+
wantErrMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml dial tcp 20.60.53.129:443: connect: connection refused",
103+
},
104+
{
105+
name: "Log message without URL",
106+
errMessage: "Log message contains no URL",
107+
wantErrMessage: "Log message contains no URL",
108+
},
109+
110+
{
111+
name: "Log message with multiple Urls",
112+
errMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no dial tcp 20.60.53.129:443: connect: connection refused",
113+
wantErrMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml https://blobstorage1.blob.core.windows.net/container/index.yaml dial tcp 20.60.53.129:443: connect: connection refused",
114+
},
115+
}
116+
117+
for _, tt := range tests {
118+
t.Run(tt.name, func(t *testing.T) {
119+
g := NewWithT(t)
120+
121+
err := SanitizeError(errors.New(tt.errMessage))
122+
g.Expect(err.Error()).To(Equal(tt.wantErrMessage))
123+
})
124+
}
125+
}

0 commit comments

Comments
 (0)