Skip to content

Commit f1dc3a5

Browse files
[scrubber] Scrub Git URLs in log messages (#20843)
1 parent cf24e1c commit f1dc3a5

File tree

8 files changed

+157
-19
lines changed

8 files changed

+157
-19
lines changed

components/gitpod-protocol/src/util/scrubbing-config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ export const redactedFields = ["auth_", "password", "token", "key", "jwt", "secr
1010
export const hashedFields = ["contextURL", "workspaceID", "username"];
1111

1212
// hashedValues are regular expressions which when matched cause the entire value to be hashed
13-
export const hashedValues = new Map<string, RegExp>([]);
13+
export const hashedValues = new Map<string, RegExp>([["url", /https?:\/\/[^\s]+\.git\b/g]]);
1414
// redactedValues are regular expressions which when matched cause the entire value to be redacted
1515
export const redactedValues = new Map<string, RegExp>([
1616
// https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email)

components/gitpod-protocol/src/util/scrubbing.spec.ts

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,5 +59,116 @@ export class ScrubbingTest {
5959
const scrubbedValue = new TrustedValue(scrubber.scrubValue("[email protected]"));
6060
expect(scrubber.scrub({ key: scrubbedValue })).to.deep.equal({ key: "[redacted:email]" });
6161
}
62+
63+
@test public testAnalyticsProperties_URLScrubbing() {
64+
// Test case that mirrors the analytics.track() usage pattern
65+
const mockInstance = {
66+
id: "test-instance-123",
67+
workspaceId: "test-workspace-456",
68+
stoppingTime: "2023-01-01T00:00:00.000Z",
69+
status: {
70+
conditions: [
71+
{
72+
message:
73+
"Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git --config http.version=HTTP/1.1 . failed (exit status 128):",
74+
},
75+
{
76+
message: "Another error with URL: https://github.com/user/repo.git",
77+
},
78+
{
79+
message: "Error without URL",
80+
},
81+
{
82+
message: "API call to https://api.example.com/endpoint failed",
83+
},
84+
],
85+
timeout: false,
86+
},
87+
};
88+
89+
// This mirrors the exact usage in workspace-instance-controller.ts
90+
const scrubbedProperties = scrubber.scrub({
91+
instanceId: mockInstance.id,
92+
workspaceId: mockInstance.workspaceId,
93+
stoppingTime: new Date(mockInstance.stoppingTime),
94+
conditions: mockInstance.status.conditions,
95+
timeout: mockInstance.status.timeout,
96+
});
97+
98+
// Verify workspaceId is hashed (field-based scrubbing)
99+
expect(scrubbedProperties.workspaceId).to.match(/^\[redacted:md5:[a-f0-9]{32}\]$/);
100+
101+
// Verify instanceId is not scrubbed (not in sensitive fields)
102+
expect(scrubbedProperties.instanceId).to.equal("test-instance-123");
103+
104+
// Verify URLs in nested conditions are hashed (pattern-based scrubbing)
105+
expect(scrubbedProperties.conditions[0].message).to.include("[redacted:md5:");
106+
expect(scrubbedProperties.conditions[0].message).to.include(":url]");
107+
expect(scrubbedProperties.conditions[0].message).to.not.include("gitlab.com");
108+
109+
expect(scrubbedProperties.conditions[1].message).to.include("[redacted:md5:");
110+
expect(scrubbedProperties.conditions[1].message).to.include(":url]");
111+
expect(scrubbedProperties.conditions[1].message).to.not.include("github.com");
112+
113+
// Verify non-URL message is unchanged
114+
expect(scrubbedProperties.conditions[2].message).to.equal("Error without URL");
115+
116+
// Verify non-.git URL is NOT scrubbed
117+
expect(scrubbedProperties.conditions[3].message).to.equal(
118+
"API call to https://api.example.com/endpoint failed",
119+
);
120+
expect(scrubbedProperties.conditions[3].message).to.not.include("[redacted:md5:");
121+
122+
// Verify other properties are preserved
123+
expect(scrubbedProperties.timeout).to.equal(false);
124+
// Date objects get converted to empty objects by the scrubber since they don't have enumerable properties
125+
expect(scrubbedProperties.stoppingTime).to.be.an("object");
126+
}
127+
128+
@test public testURL_PatternScrubbing() {
129+
// Test individual URL scrubbing for .git URLs
130+
const urlMessage = "git clone https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git failed";
131+
const scrubbedMessage = scrubber.scrubValue(urlMessage);
132+
133+
expect(scrubbedMessage).to.include("[redacted:md5:");
134+
expect(scrubbedMessage).to.include(":url]");
135+
expect(scrubbedMessage).to.not.include("gitlab.com");
136+
expect(scrubbedMessage).to.include("git clone");
137+
expect(scrubbedMessage).to.include("failed");
138+
}
139+
140+
@test public testURL_NonGitURLsNotScrubbed() {
141+
// Test that non-.git URLs are NOT scrubbed
142+
const apiMessage = "API call to https://api.example.com/endpoint failed";
143+
const scrubbedMessage = scrubber.scrubValue(apiMessage);
144+
145+
// Non-.git URLs should remain unchanged
146+
expect(scrubbedMessage).to.equal("API call to https://api.example.com/endpoint failed");
147+
expect(scrubbedMessage).to.not.include("[redacted:md5:");
148+
}
149+
150+
@test public testURL_MixedURLTypes() {
151+
// Test message with both .git and non-.git URLs
152+
const mixedMessage = "Clone from https://github.com/user/repo.git then visit https://docs.gitpod.io/configure";
153+
const scrubbedMessage = scrubber.scrubValue(mixedMessage);
154+
155+
// .git URL should be scrubbed
156+
expect(scrubbedMessage).to.include("[redacted:md5:");
157+
expect(scrubbedMessage).to.include(":url]");
158+
expect(scrubbedMessage).to.not.include("github.com/user/repo.git");
159+
160+
// Non-.git URL should remain unchanged
161+
expect(scrubbedMessage).to.include("https://docs.gitpod.io/configure");
162+
}
163+
164+
@test public testURL_HttpGitURLs() {
165+
// Test that http:// .git URLs are also scrubbed
166+
const httpMessage = "git clone http://internal-git.company.com/project.git";
167+
const scrubbedMessage = scrubber.scrubValue(httpMessage);
168+
169+
expect(scrubbedMessage).to.include("[redacted:md5:");
170+
expect(scrubbedMessage).to.include(":url]");
171+
expect(scrubbedMessage).to.not.include("internal-git.company.com");
172+
}
62173
}
63174
module.exports = new ScrubbingTest();

components/gitpod-protocol/src/util/scrubbing.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,13 @@ function doScrub(obj: any, depth: number, nested: boolean): any {
133133
const result: any = {};
134134
for (const [key, value] of Object.entries(obj as object)) {
135135
if (typeof value === "string") {
136-
result[key] = scrubber.scrubKeyValue(key, value);
136+
// First apply field-based scrubbing, then pattern-based scrubbing
137+
let scrubbedValue = scrubber.scrubKeyValue(key, value);
138+
// If no field-based scrubbing was applied, apply pattern-based scrubbing
139+
if (scrubbedValue === value) {
140+
scrubbedValue = scrubber.scrubValue(value);
141+
}
142+
result[key] = scrubbedValue;
137143
} else {
138144
result[key] = doScrub(value, depth + 1, nested);
139145
}

components/scrubber/config.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@ var (
3232
}
3333

3434
// HashedValues are regular expressions which - when matched - cause the entire value to be hashed
35-
HashedValues = map[string]*regexp.Regexp{}
35+
HashedValues = map[string]*regexp.Regexp{
36+
"url": regexp.MustCompile(`https?://[^\s]+\.git\b`),
37+
}
3638

3739
// RedactedValues are regular expressions which - when matched - cause the entire value to be redacted
3840
RedactedValues = map[string]*regexp.Regexp{

components/scrubber/sanitisation_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ func TestSanitiser(t *testing.T) {
2929
{Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS user repo", Input: "https://bitbucket.gitpod-dev.com/users/gitpod/repos/repotest/browse", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/users/[redacted:md5:5bc8d0354fba47db774b70d2a9161bbb]/repos/[redacted:md5:3c3f61c49fd93e84a73e33f6194586cd]/browse"},
3030
{Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS project PR", Input: "https://bitbucket.gitpod-dev.com/projects/TES/repos/2k-repos-0/pull-requests/1/overview", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/projects/[redacted:md5:08e789053de980e0f1ac70a61125a17d]/repos/[redacted:md5:14571b57e21a5c26b9e81fe6216e27d1]/pull-requests/1/[redacted:md5:bce059749d61c1c247c303d0118d0d53]"},
3131
{Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS branch", Input: "https://bitbucket.gitpod-dev.com/projects/TES/repos/2k-repos-0/branches?base=test", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/projects/[redacted:md5:08e789053de980e0f1ac70a61125a17d]/repos/[redacted:md5:14571b57e21a5c26b9e81fe6216e27d1]/branches?[redacted:md5:0135e6beb2a6deb4f0668facc47bce76]"},
32+
{Func: SanitiseHashURLPathSegments, Name: "GitLab Git URL", Input: "https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git", Expectation: "[redacted:md5:8c3e227c86409b1e3e734e711a77fd6c]/[redacted:md5:7c879ad6a7611d94b34c1911910257c9]/[redacted:md5:2567a5ec9705eb7ac2c984033e06189d]/[redacted:md5:aca33b9c046b2a50b8c3c54cc0380de8]/[redacted:md5:10cd395cf71c18328c863c08e78f3fd0]/[redacted:md5:d890bc8f5f32a034527f9be94624af58]"},
3233
}
3334

3435
for _, test := range tests {

components/scrubber/scrubber_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ func TestValue(t *testing.T) {
2222
{Name: "empty string"},
2323
{Name: "email", Value: "[email protected]", Expectation: "[redacted:email]"},
2424
{Name: "email in text", Value: "The email is [email protected] or [email protected]", Expectation: "The email is [redacted:email] or [redacted:email]"},
25+
{Name: "GitLab Git URL in text", Value: "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git --config http.version=HTTP/1.1 . failed (exit status 128)", Expectation: "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules [redacted:md5:aa0dfa0c402612a8314b8e7c4326a395:url] --config http.version=HTTP/1.1 . failed (exit status 128)"},
26+
{Name: "Non-git URL not scrubbed", Value: "API call to https://api.example.com/endpoint failed", Expectation: "API call to https://api.example.com/endpoint failed"},
27+
{Name: "Mixed URLs", Value: "Clone from https://github.com/user/repo.git then visit https://docs.gitpod.io/configure", Expectation: "Clone from [redacted:md5:3c5467d320a0b72072bc609f12e7d879:url] then visit https://docs.gitpod.io/configure"},
28+
{Name: "HTTP Git URL", Value: "git clone http://internal-git.company.com/project.git", Expectation: "git clone [redacted:md5:11774800a9c933d1181c479ea207cdff:url]"},
2529
}
2630

2731
for _, test := range tests {

components/server/src/workspace/workspace-starter.ts

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ import {
6363
WorkspaceTimeoutDuration,
6464
} from "@gitpod/gitpod-protocol";
6565
import { IAnalyticsWriter, TrackMessage } from "@gitpod/gitpod-protocol/lib/analytics";
66+
import { scrubber } from "@gitpod/gitpod-protocol/lib/util/scrubbing";
6667
import { AttributionId } from "@gitpod/gitpod-protocol/lib/attribution";
6768
import { Deferred } from "@gitpod/gitpod-protocol/lib/util/deferred";
6869
import { LogContext, log } from "@gitpod/gitpod-protocol/lib/util/logging";
@@ -729,11 +730,12 @@ export class WorkspaceStarter {
729730
project?.settings?.prebuilds?.triggerStrategy ?? "webhook-based";
730731
}
731732

732-
// update analytics
733+
// update analytics - scrub properties that might contain sensitive data like URLs
734+
const scrubbedTrackProperties = scrubber.scrub(trackProperties);
733735
this.analytics.track({
734736
userId: user.id,
735737
event: "workspace_started",
736-
properties: trackProperties,
738+
properties: scrubbedTrackProperties,
737739
timestamp: new Date(instance.creationTime),
738740
});
739741
} catch (err) {
@@ -1083,15 +1085,17 @@ export class WorkspaceStarter {
10831085
};
10841086

10851087
if (WithReferrerContext.is(workspace.context)) {
1088+
// Scrub properties that might contain sensitive data like URLs
1089+
const scrubbedReferrerProperties = scrubber.scrub({
1090+
workspaceId: workspace.id,
1091+
instanceId: instance.id,
1092+
referrer: workspace.context.referrer,
1093+
referrerIde: workspace.context.referrerIde,
1094+
});
10861095
this.analytics.track({
10871096
userId: user.id,
10881097
event: "ide_referrer",
1089-
properties: {
1090-
workspaceId: workspace.id,
1091-
instanceId: instance.id,
1092-
referrer: workspace.context.referrer,
1093-
referrerIde: workspace.context.referrerIde,
1094-
},
1098+
properties: scrubbedReferrerProperties,
10951099
});
10961100
}
10971101
return instance;
@@ -1395,10 +1399,16 @@ export class WorkspaceStarter {
13951399
err = new StartInstanceError("imageBuildFailed", err);
13961400
increaseImageBuildsCompletedTotal("failed");
13971401
}
1402+
// Scrub properties that might contain sensitive data like URLs
1403+
const scrubbedImageBuildProperties = scrubber.scrub({
1404+
workspaceId: workspace.id,
1405+
instanceId: instance.id,
1406+
contextURL: workspace.contextURL,
1407+
});
13981408
this.analytics.track({
13991409
userId: user.id,
14001410
event: "imagebuild-failed",
1401-
properties: { workspaceId: workspace.id, instanceId: instance.id, contextURL: workspace.contextURL },
1411+
properties: scrubbedImageBuildProperties,
14021412
});
14031413

14041414
throw err;

components/ws-manager-bridge/src/workspace-instance-controller.ts

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import { repeat } from "@gitpod/gitpod-protocol/lib/util/repeat";
2020
import { PrebuildUpdater } from "./prebuild-updater";
2121
import { RedisPublisher } from "@gitpod/gitpod-db/lib";
2222
import { durationLongerThanSeconds } from "@gitpod/gitpod-protocol/lib/util/timeutil";
23+
import { scrubber } from "@gitpod/gitpod-protocol/lib/util/scrubbing";
2324

2425
export const WorkspaceInstanceController = Symbol("WorkspaceInstanceController");
2526

@@ -286,17 +287,20 @@ export class WorkspaceInstanceControllerImpl implements WorkspaceInstanceControl
286287

287288
try {
288289
await this.userDB.trace({ span }).deleteGitpodTokensNamedLike(ownerUserID, `${instance.id}-%`);
290+
// Scrub properties that might contain sensitive data like URLs
291+
const scrubbedProperties = scrubber.scrub({
292+
instanceId: instance.id,
293+
workspaceId: instance.workspaceId,
294+
stoppingTime: new Date(instance.stoppingTime!),
295+
conditions: instance.status.conditions,
296+
timeout: instance.status.timeout,
297+
});
298+
289299
this.analytics.track({
290300
userId: ownerUserID,
291301
event: "workspace_stopped",
292302
messageId: `bridge-wsstopped-${instance.id}`,
293-
properties: {
294-
instanceId: instance.id,
295-
workspaceId: instance.workspaceId,
296-
stoppingTime: new Date(instance.stoppingTime!),
297-
conditions: instance.status.conditions,
298-
timeout: instance.status.timeout,
299-
},
303+
properties: scrubbedProperties,
300304
timestamp: new Date(instance.stoppedTime!),
301305
});
302306
} catch (err) {

0 commit comments

Comments
 (0)