diff --git a/components/gitpod-protocol/src/util/scrubbing-config.ts b/components/gitpod-protocol/src/util/scrubbing-config.ts index 6ce92108a45b7d..ab24a7981f1cbe 100644 --- a/components/gitpod-protocol/src/util/scrubbing-config.ts +++ b/components/gitpod-protocol/src/util/scrubbing-config.ts @@ -10,7 +10,7 @@ export const redactedFields = ["auth_", "password", "token", "key", "jwt", "secr export const hashedFields = ["contextURL", "workspaceID", "username"]; // hashedValues are regular expressions which when matched cause the entire value to be hashed -export const hashedValues = new Map([]); +export const hashedValues = new Map([["url", /https?:\/\/[^\s]+\.git\b/g]]); // redactedValues are regular expressions which when matched cause the entire value to be redacted export const redactedValues = new Map([ // https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email) diff --git a/components/gitpod-protocol/src/util/scrubbing.spec.ts b/components/gitpod-protocol/src/util/scrubbing.spec.ts index 5fd923dd9fcaac..f4368c43bba369 100644 --- a/components/gitpod-protocol/src/util/scrubbing.spec.ts +++ b/components/gitpod-protocol/src/util/scrubbing.spec.ts @@ -59,5 +59,116 @@ export class ScrubbingTest { const scrubbedValue = new TrustedValue(scrubber.scrubValue("foo@bar.com")); expect(scrubber.scrub({ key: scrubbedValue })).to.deep.equal({ key: "[redacted:email]" }); } + + @test public testAnalyticsProperties_URLScrubbing() { + // Test case that mirrors the analytics.track() usage pattern + const mockInstance = { + id: "test-instance-123", + workspaceId: "test-workspace-456", + stoppingTime: "2023-01-01T00:00:00.000Z", + status: { + conditions: [ + { + message: + "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git --config http.version=HTTP/1.1 . failed (exit status 128):", + }, + { + message: "Another error with URL: https://github.com/user/repo.git", + }, + { + message: "Error without URL", + }, + { + message: "API call to https://api.example.com/endpoint failed", + }, + ], + timeout: false, + }, + }; + + // This mirrors the exact usage in workspace-instance-controller.ts + const scrubbedProperties = scrubber.scrub({ + instanceId: mockInstance.id, + workspaceId: mockInstance.workspaceId, + stoppingTime: new Date(mockInstance.stoppingTime), + conditions: mockInstance.status.conditions, + timeout: mockInstance.status.timeout, + }); + + // Verify workspaceId is hashed (field-based scrubbing) + expect(scrubbedProperties.workspaceId).to.match(/^\[redacted:md5:[a-f0-9]{32}\]$/); + + // Verify instanceId is not scrubbed (not in sensitive fields) + expect(scrubbedProperties.instanceId).to.equal("test-instance-123"); + + // Verify URLs in nested conditions are hashed (pattern-based scrubbing) + expect(scrubbedProperties.conditions[0].message).to.include("[redacted:md5:"); + expect(scrubbedProperties.conditions[0].message).to.include(":url]"); + expect(scrubbedProperties.conditions[0].message).to.not.include("gitlab.com"); + + expect(scrubbedProperties.conditions[1].message).to.include("[redacted:md5:"); + expect(scrubbedProperties.conditions[1].message).to.include(":url]"); + expect(scrubbedProperties.conditions[1].message).to.not.include("github.com"); + + // Verify non-URL message is unchanged + expect(scrubbedProperties.conditions[2].message).to.equal("Error without URL"); + + // Verify non-.git URL is NOT scrubbed + expect(scrubbedProperties.conditions[3].message).to.equal( + "API call to https://api.example.com/endpoint failed", + ); + expect(scrubbedProperties.conditions[3].message).to.not.include("[redacted:md5:"); + + // Verify other properties are preserved + expect(scrubbedProperties.timeout).to.equal(false); + // Date objects get converted to empty objects by the scrubber since they don't have enumerable properties + expect(scrubbedProperties.stoppingTime).to.be.an("object"); + } + + @test public testURL_PatternScrubbing() { + // Test individual URL scrubbing for .git URLs + const urlMessage = "git clone https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git failed"; + const scrubbedMessage = scrubber.scrubValue(urlMessage); + + expect(scrubbedMessage).to.include("[redacted:md5:"); + expect(scrubbedMessage).to.include(":url]"); + expect(scrubbedMessage).to.not.include("gitlab.com"); + expect(scrubbedMessage).to.include("git clone"); + expect(scrubbedMessage).to.include("failed"); + } + + @test public testURL_NonGitURLsNotScrubbed() { + // Test that non-.git URLs are NOT scrubbed + const apiMessage = "API call to https://api.example.com/endpoint failed"; + const scrubbedMessage = scrubber.scrubValue(apiMessage); + + // Non-.git URLs should remain unchanged + expect(scrubbedMessage).to.equal("API call to https://api.example.com/endpoint failed"); + expect(scrubbedMessage).to.not.include("[redacted:md5:"); + } + + @test public testURL_MixedURLTypes() { + // Test message with both .git and non-.git URLs + const mixedMessage = "Clone from https://github.com/user/repo.git then visit https://docs.gitpod.io/configure"; + const scrubbedMessage = scrubber.scrubValue(mixedMessage); + + // .git URL should be scrubbed + expect(scrubbedMessage).to.include("[redacted:md5:"); + expect(scrubbedMessage).to.include(":url]"); + expect(scrubbedMessage).to.not.include("github.com/user/repo.git"); + + // Non-.git URL should remain unchanged + expect(scrubbedMessage).to.include("https://docs.gitpod.io/configure"); + } + + @test public testURL_HttpGitURLs() { + // Test that http:// .git URLs are also scrubbed + const httpMessage = "git clone http://internal-git.company.com/project.git"; + const scrubbedMessage = scrubber.scrubValue(httpMessage); + + expect(scrubbedMessage).to.include("[redacted:md5:"); + expect(scrubbedMessage).to.include(":url]"); + expect(scrubbedMessage).to.not.include("internal-git.company.com"); + } } module.exports = new ScrubbingTest(); diff --git a/components/gitpod-protocol/src/util/scrubbing.ts b/components/gitpod-protocol/src/util/scrubbing.ts index 985182d027f058..f0844f35a8ee4e 100644 --- a/components/gitpod-protocol/src/util/scrubbing.ts +++ b/components/gitpod-protocol/src/util/scrubbing.ts @@ -133,7 +133,13 @@ function doScrub(obj: any, depth: number, nested: boolean): any { const result: any = {}; for (const [key, value] of Object.entries(obj as object)) { if (typeof value === "string") { - result[key] = scrubber.scrubKeyValue(key, value); + // First apply field-based scrubbing, then pattern-based scrubbing + let scrubbedValue = scrubber.scrubKeyValue(key, value); + // If no field-based scrubbing was applied, apply pattern-based scrubbing + if (scrubbedValue === value) { + scrubbedValue = scrubber.scrubValue(value); + } + result[key] = scrubbedValue; } else { result[key] = doScrub(value, depth + 1, nested); } diff --git a/components/scrubber/config.go b/components/scrubber/config.go index 26f25f45719e6d..c29eefff122593 100644 --- a/components/scrubber/config.go +++ b/components/scrubber/config.go @@ -32,7 +32,9 @@ var ( } // HashedValues are regular expressions which - when matched - cause the entire value to be hashed - HashedValues = map[string]*regexp.Regexp{} + HashedValues = map[string]*regexp.Regexp{ + "url": regexp.MustCompile(`https?://[^\s]+\.git\b`), + } // RedactedValues are regular expressions which - when matched - cause the entire value to be redacted RedactedValues = map[string]*regexp.Regexp{ diff --git a/components/scrubber/sanitisation_test.go b/components/scrubber/sanitisation_test.go index da50b2aaab4819..824f92c45af2b1 100644 --- a/components/scrubber/sanitisation_test.go +++ b/components/scrubber/sanitisation_test.go @@ -29,6 +29,7 @@ func TestSanitiser(t *testing.T) { {Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS user repo", Input: "https://bitbucket.gitpod-dev.com/users/gitpod/repos/repotest/browse", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/users/[redacted:md5:5bc8d0354fba47db774b70d2a9161bbb]/repos/[redacted:md5:3c3f61c49fd93e84a73e33f6194586cd]/browse"}, {Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS project PR", Input: "https://bitbucket.gitpod-dev.com/projects/TES/repos/2k-repos-0/pull-requests/1/overview", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/projects/[redacted:md5:08e789053de980e0f1ac70a61125a17d]/repos/[redacted:md5:14571b57e21a5c26b9e81fe6216e27d1]/pull-requests/1/[redacted:md5:bce059749d61c1c247c303d0118d0d53]"}, {Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS branch", Input: "https://bitbucket.gitpod-dev.com/projects/TES/repos/2k-repos-0/branches?base=test", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/projects/[redacted:md5:08e789053de980e0f1ac70a61125a17d]/repos/[redacted:md5:14571b57e21a5c26b9e81fe6216e27d1]/branches?[redacted:md5:0135e6beb2a6deb4f0668facc47bce76]"}, + {Func: SanitiseHashURLPathSegments, Name: "GitLab Git URL", Input: "https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git", Expectation: "[redacted:md5:8c3e227c86409b1e3e734e711a77fd6c]/[redacted:md5:7c879ad6a7611d94b34c1911910257c9]/[redacted:md5:2567a5ec9705eb7ac2c984033e06189d]/[redacted:md5:aca33b9c046b2a50b8c3c54cc0380de8]/[redacted:md5:10cd395cf71c18328c863c08e78f3fd0]/[redacted:md5:d890bc8f5f32a034527f9be94624af58]"}, } for _, test := range tests { diff --git a/components/scrubber/scrubber_test.go b/components/scrubber/scrubber_test.go index 7984c4bf363d7b..e739ca1de20568 100644 --- a/components/scrubber/scrubber_test.go +++ b/components/scrubber/scrubber_test.go @@ -22,6 +22,10 @@ func TestValue(t *testing.T) { {Name: "empty string"}, {Name: "email", Value: "foo@bar.com", Expectation: "[redacted:email]"}, {Name: "email in text", Value: "The email is foo@bar.com or bar@foo.com", Expectation: "The email is [redacted:email] or [redacted:email]"}, + {Name: "GitLab Git URL in text", Value: "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git --config http.version=HTTP/1.1 . failed (exit status 128)", Expectation: "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules [redacted:md5:aa0dfa0c402612a8314b8e7c4326a395:url] --config http.version=HTTP/1.1 . failed (exit status 128)"}, + {Name: "Non-git URL not scrubbed", Value: "API call to https://api.example.com/endpoint failed", Expectation: "API call to https://api.example.com/endpoint failed"}, + {Name: "Mixed URLs", Value: "Clone from https://github.com/user/repo.git then visit https://docs.gitpod.io/configure", Expectation: "Clone from [redacted:md5:3c5467d320a0b72072bc609f12e7d879:url] then visit https://docs.gitpod.io/configure"}, + {Name: "HTTP Git URL", Value: "git clone http://internal-git.company.com/project.git", Expectation: "git clone [redacted:md5:11774800a9c933d1181c479ea207cdff:url]"}, } for _, test := range tests { diff --git a/components/server/src/workspace/workspace-starter.ts b/components/server/src/workspace/workspace-starter.ts index fb0d08eabb1383..61c69dfcfa4e01 100644 --- a/components/server/src/workspace/workspace-starter.ts +++ b/components/server/src/workspace/workspace-starter.ts @@ -63,6 +63,7 @@ import { WorkspaceTimeoutDuration, } from "@gitpod/gitpod-protocol"; import { IAnalyticsWriter, TrackMessage } from "@gitpod/gitpod-protocol/lib/analytics"; +import { scrubber } from "@gitpod/gitpod-protocol/lib/util/scrubbing"; import { AttributionId } from "@gitpod/gitpod-protocol/lib/attribution"; import { Deferred } from "@gitpod/gitpod-protocol/lib/util/deferred"; import { LogContext, log } from "@gitpod/gitpod-protocol/lib/util/logging"; @@ -729,11 +730,12 @@ export class WorkspaceStarter { project?.settings?.prebuilds?.triggerStrategy ?? "webhook-based"; } - // update analytics + // update analytics - scrub properties that might contain sensitive data like URLs + const scrubbedTrackProperties = scrubber.scrub(trackProperties); this.analytics.track({ userId: user.id, event: "workspace_started", - properties: trackProperties, + properties: scrubbedTrackProperties, timestamp: new Date(instance.creationTime), }); } catch (err) { @@ -1083,15 +1085,17 @@ export class WorkspaceStarter { }; if (WithReferrerContext.is(workspace.context)) { + // Scrub properties that might contain sensitive data like URLs + const scrubbedReferrerProperties = scrubber.scrub({ + workspaceId: workspace.id, + instanceId: instance.id, + referrer: workspace.context.referrer, + referrerIde: workspace.context.referrerIde, + }); this.analytics.track({ userId: user.id, event: "ide_referrer", - properties: { - workspaceId: workspace.id, - instanceId: instance.id, - referrer: workspace.context.referrer, - referrerIde: workspace.context.referrerIde, - }, + properties: scrubbedReferrerProperties, }); } return instance; @@ -1395,10 +1399,16 @@ export class WorkspaceStarter { err = new StartInstanceError("imageBuildFailed", err); increaseImageBuildsCompletedTotal("failed"); } + // Scrub properties that might contain sensitive data like URLs + const scrubbedImageBuildProperties = scrubber.scrub({ + workspaceId: workspace.id, + instanceId: instance.id, + contextURL: workspace.contextURL, + }); this.analytics.track({ userId: user.id, event: "imagebuild-failed", - properties: { workspaceId: workspace.id, instanceId: instance.id, contextURL: workspace.contextURL }, + properties: scrubbedImageBuildProperties, }); throw err; diff --git a/components/ws-manager-bridge/src/workspace-instance-controller.ts b/components/ws-manager-bridge/src/workspace-instance-controller.ts index 921dd5f2de5c1b..929668efda11cd 100644 --- a/components/ws-manager-bridge/src/workspace-instance-controller.ts +++ b/components/ws-manager-bridge/src/workspace-instance-controller.ts @@ -20,6 +20,7 @@ import { repeat } from "@gitpod/gitpod-protocol/lib/util/repeat"; import { PrebuildUpdater } from "./prebuild-updater"; import { RedisPublisher } from "@gitpod/gitpod-db/lib"; import { durationLongerThanSeconds } from "@gitpod/gitpod-protocol/lib/util/timeutil"; +import { scrubber } from "@gitpod/gitpod-protocol/lib/util/scrubbing"; export const WorkspaceInstanceController = Symbol("WorkspaceInstanceController"); @@ -286,17 +287,20 @@ export class WorkspaceInstanceControllerImpl implements WorkspaceInstanceControl try { await this.userDB.trace({ span }).deleteGitpodTokensNamedLike(ownerUserID, `${instance.id}-%`); + // Scrub properties that might contain sensitive data like URLs + const scrubbedProperties = scrubber.scrub({ + instanceId: instance.id, + workspaceId: instance.workspaceId, + stoppingTime: new Date(instance.stoppingTime!), + conditions: instance.status.conditions, + timeout: instance.status.timeout, + }); + this.analytics.track({ userId: ownerUserID, event: "workspace_stopped", messageId: `bridge-wsstopped-${instance.id}`, - properties: { - instanceId: instance.id, - workspaceId: instance.workspaceId, - stoppingTime: new Date(instance.stoppingTime!), - conditions: instance.status.conditions, - timeout: instance.status.timeout, - }, + properties: scrubbedProperties, timestamp: new Date(instance.stoppedTime!), }); } catch (err) {