Skip to content

[scrubber] Scrub URLs in log messages #20843

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 2, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/gitpod-protocol/src/util/scrubbing-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ export const redactedFields = ["auth_", "password", "token", "key", "jwt", "secr
export const hashedFields = ["contextURL", "workspaceID", "username"];

// hashedValues are regular expressions which when matched cause the entire value to be hashed
export const hashedValues = new Map<string, RegExp>([]);
export const hashedValues = new Map<string, RegExp>([["url", /https?:\/\/[^\s]+\.git\b/g]]);
// redactedValues are regular expressions which when matched cause the entire value to be redacted
export const redactedValues = new Map<string, RegExp>([
// https://html.spec.whatwg.org/multipage/input.html#email-state-(type=email)
Expand Down
111 changes: 111 additions & 0 deletions components/gitpod-protocol/src/util/scrubbing.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,116 @@ export class ScrubbingTest {
const scrubbedValue = new TrustedValue(scrubber.scrubValue("[email protected]"));
expect(scrubber.scrub({ key: scrubbedValue })).to.deep.equal({ key: "[redacted:email]" });
}

@test public testAnalyticsProperties_URLScrubbing() {
// Test case that mirrors the analytics.track() usage pattern
const mockInstance = {
id: "test-instance-123",
workspaceId: "test-workspace-456",
stoppingTime: "2023-01-01T00:00:00.000Z",
status: {
conditions: [
{
message:
"Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git --config http.version=HTTP/1.1 . failed (exit status 128):",
},
{
message: "Another error with URL: https://github.com/user/repo.git",
},
{
message: "Error without URL",
},
{
message: "API call to https://api.example.com/endpoint failed",
},
],
timeout: false,
},
};

// This mirrors the exact usage in workspace-instance-controller.ts
const scrubbedProperties = scrubber.scrub({
instanceId: mockInstance.id,
workspaceId: mockInstance.workspaceId,
stoppingTime: new Date(mockInstance.stoppingTime),
conditions: mockInstance.status.conditions,
timeout: mockInstance.status.timeout,
});

// Verify workspaceId is hashed (field-based scrubbing)
expect(scrubbedProperties.workspaceId).to.match(/^\[redacted:md5:[a-f0-9]{32}\]$/);

// Verify instanceId is not scrubbed (not in sensitive fields)
expect(scrubbedProperties.instanceId).to.equal("test-instance-123");

// Verify URLs in nested conditions are hashed (pattern-based scrubbing)
expect(scrubbedProperties.conditions[0].message).to.include("[redacted:md5:");
expect(scrubbedProperties.conditions[0].message).to.include(":url]");
expect(scrubbedProperties.conditions[0].message).to.not.include("gitlab.com");

expect(scrubbedProperties.conditions[1].message).to.include("[redacted:md5:");
expect(scrubbedProperties.conditions[1].message).to.include(":url]");
expect(scrubbedProperties.conditions[1].message).to.not.include("github.com");

// Verify non-URL message is unchanged
expect(scrubbedProperties.conditions[2].message).to.equal("Error without URL");

// Verify non-.git URL is NOT scrubbed
expect(scrubbedProperties.conditions[3].message).to.equal(
"API call to https://api.example.com/endpoint failed",
);
expect(scrubbedProperties.conditions[3].message).to.not.include("[redacted:md5:");

// Verify other properties are preserved
expect(scrubbedProperties.timeout).to.equal(false);
// Date objects get converted to empty objects by the scrubber since they don't have enumerable properties
expect(scrubbedProperties.stoppingTime).to.be.an("object");
}

@test public testURL_PatternScrubbing() {
// Test individual URL scrubbing for .git URLs
const urlMessage = "git clone https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git failed";
const scrubbedMessage = scrubber.scrubValue(urlMessage);

expect(scrubbedMessage).to.include("[redacted:md5:");
expect(scrubbedMessage).to.include(":url]");
expect(scrubbedMessage).to.not.include("gitlab.com");
expect(scrubbedMessage).to.include("git clone");
expect(scrubbedMessage).to.include("failed");
}

@test public testURL_NonGitURLsNotScrubbed() {
// Test that non-.git URLs are NOT scrubbed
const apiMessage = "API call to https://api.example.com/endpoint failed";
const scrubbedMessage = scrubber.scrubValue(apiMessage);

// Non-.git URLs should remain unchanged
expect(scrubbedMessage).to.equal("API call to https://api.example.com/endpoint failed");
expect(scrubbedMessage).to.not.include("[redacted:md5:");
}

@test public testURL_MixedURLTypes() {
// Test message with both .git and non-.git URLs
const mixedMessage = "Clone from https://github.com/user/repo.git then visit https://docs.gitpod.io/configure";
const scrubbedMessage = scrubber.scrubValue(mixedMessage);

// .git URL should be scrubbed
expect(scrubbedMessage).to.include("[redacted:md5:");
expect(scrubbedMessage).to.include(":url]");
expect(scrubbedMessage).to.not.include("github.com/user/repo.git");

// Non-.git URL should remain unchanged
expect(scrubbedMessage).to.include("https://docs.gitpod.io/configure");
}

@test public testURL_HttpGitURLs() {
// Test that http:// .git URLs are also scrubbed
const httpMessage = "git clone http://internal-git.company.com/project.git";
const scrubbedMessage = scrubber.scrubValue(httpMessage);

expect(scrubbedMessage).to.include("[redacted:md5:");
expect(scrubbedMessage).to.include(":url]");
expect(scrubbedMessage).to.not.include("internal-git.company.com");
}
}
module.exports = new ScrubbingTest();
8 changes: 7 additions & 1 deletion components/gitpod-protocol/src/util/scrubbing.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,13 @@ function doScrub(obj: any, depth: number, nested: boolean): any {
const result: any = {};
for (const [key, value] of Object.entries(obj as object)) {
if (typeof value === "string") {
result[key] = scrubber.scrubKeyValue(key, value);
// First apply field-based scrubbing, then pattern-based scrubbing
let scrubbedValue = scrubber.scrubKeyValue(key, value);
// If no field-based scrubbing was applied, apply pattern-based scrubbing
if (scrubbedValue === value) {
scrubbedValue = scrubber.scrubValue(value);
}
result[key] = scrubbedValue;
} else {
result[key] = doScrub(value, depth + 1, nested);
}
Expand Down
4 changes: 3 additions & 1 deletion components/scrubber/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ var (
}

// HashedValues are regular expressions which - when matched - cause the entire value to be hashed
HashedValues = map[string]*regexp.Regexp{}
HashedValues = map[string]*regexp.Regexp{
"url": regexp.MustCompile(`https?://[^\s]+\.git\b`),
}

// RedactedValues are regular expressions which - when matched - cause the entire value to be redacted
RedactedValues = map[string]*regexp.Regexp{
Expand Down
1 change: 1 addition & 0 deletions components/scrubber/sanitisation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ func TestSanitiser(t *testing.T) {
{Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS user repo", Input: "https://bitbucket.gitpod-dev.com/users/gitpod/repos/repotest/browse", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/users/[redacted:md5:5bc8d0354fba47db774b70d2a9161bbb]/repos/[redacted:md5:3c3f61c49fd93e84a73e33f6194586cd]/browse"},
{Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS project PR", Input: "https://bitbucket.gitpod-dev.com/projects/TES/repos/2k-repos-0/pull-requests/1/overview", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/projects/[redacted:md5:08e789053de980e0f1ac70a61125a17d]/repos/[redacted:md5:14571b57e21a5c26b9e81fe6216e27d1]/pull-requests/1/[redacted:md5:bce059749d61c1c247c303d0118d0d53]"},
{Func: SanitiseHashURLPathSegments, Name: "hash contextURL with BBS branch", Input: "https://bitbucket.gitpod-dev.com/projects/TES/repos/2k-repos-0/branches?base=test", Expectation: "[redacted:md5:454c2006e527428ce0fbb2222edfb5c5]/projects/[redacted:md5:08e789053de980e0f1ac70a61125a17d]/repos/[redacted:md5:14571b57e21a5c26b9e81fe6216e27d1]/branches?[redacted:md5:0135e6beb2a6deb4f0668facc47bce76]"},
{Func: SanitiseHashURLPathSegments, Name: "GitLab Git URL", Input: "https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git", Expectation: "[redacted:md5:8c3e227c86409b1e3e734e711a77fd6c]/[redacted:md5:7c879ad6a7611d94b34c1911910257c9]/[redacted:md5:2567a5ec9705eb7ac2c984033e06189d]/[redacted:md5:aca33b9c046b2a50b8c3c54cc0380de8]/[redacted:md5:10cd395cf71c18328c863c08e78f3fd0]/[redacted:md5:d890bc8f5f32a034527f9be94624af58]"},
}

for _, test := range tests {
Expand Down
4 changes: 4 additions & 0 deletions components/scrubber/scrubber_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ func TestValue(t *testing.T) {
{Name: "empty string"},
{Name: "email", Value: "[email protected]", Expectation: "[redacted:email]"},
{Name: "email in text", Value: "The email is [email protected] or [email protected]", Expectation: "The email is [redacted:email] or [redacted:email]"},
{Name: "GitLab Git URL in text", Value: "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules https://gitlab.com/acme-corp/web/frontend/services/deployment-manager.git --config http.version=HTTP/1.1 . failed (exit status 128)", Expectation: "Content initialization failed: cannot initialize workspace: git initializer gitClone: git clone --depth=1 --shallow-submodules [redacted:md5:aa0dfa0c402612a8314b8e7c4326a395:url] --config http.version=HTTP/1.1 . failed (exit status 128)"},
{Name: "Non-git URL not scrubbed", Value: "API call to https://api.example.com/endpoint failed", Expectation: "API call to https://api.example.com/endpoint failed"},
{Name: "Mixed URLs", Value: "Clone from https://github.com/user/repo.git then visit https://docs.gitpod.io/configure", Expectation: "Clone from [redacted:md5:3c5467d320a0b72072bc609f12e7d879:url] then visit https://docs.gitpod.io/configure"},
{Name: "HTTP Git URL", Value: "git clone http://internal-git.company.com/project.git", Expectation: "git clone [redacted:md5:11774800a9c933d1181c479ea207cdff:url]"},
}

for _, test := range tests {
Expand Down
28 changes: 19 additions & 9 deletions components/server/src/workspace/workspace-starter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ import {
WorkspaceTimeoutDuration,
} from "@gitpod/gitpod-protocol";
import { IAnalyticsWriter, TrackMessage } from "@gitpod/gitpod-protocol/lib/analytics";
import { scrubber } from "@gitpod/gitpod-protocol/lib/util/scrubbing";
import { AttributionId } from "@gitpod/gitpod-protocol/lib/attribution";
import { Deferred } from "@gitpod/gitpod-protocol/lib/util/deferred";
import { LogContext, log } from "@gitpod/gitpod-protocol/lib/util/logging";
Expand Down Expand Up @@ -729,11 +730,12 @@ export class WorkspaceStarter {
project?.settings?.prebuilds?.triggerStrategy ?? "webhook-based";
}

// update analytics
// update analytics - scrub properties that might contain sensitive data like URLs
const scrubbedTrackProperties = scrubber.scrub(trackProperties);
this.analytics.track({
userId: user.id,
event: "workspace_started",
properties: trackProperties,
properties: scrubbedTrackProperties,
timestamp: new Date(instance.creationTime),
});
} catch (err) {
Expand Down Expand Up @@ -1083,15 +1085,17 @@ export class WorkspaceStarter {
};

if (WithReferrerContext.is(workspace.context)) {
// Scrub properties that might contain sensitive data like URLs
const scrubbedReferrerProperties = scrubber.scrub({
workspaceId: workspace.id,
instanceId: instance.id,
referrer: workspace.context.referrer,
referrerIde: workspace.context.referrerIde,
});
this.analytics.track({
userId: user.id,
event: "ide_referrer",
properties: {
workspaceId: workspace.id,
instanceId: instance.id,
referrer: workspace.context.referrer,
referrerIde: workspace.context.referrerIde,
},
properties: scrubbedReferrerProperties,
});
}
return instance;
Expand Down Expand Up @@ -1395,10 +1399,16 @@ export class WorkspaceStarter {
err = new StartInstanceError("imageBuildFailed", err);
increaseImageBuildsCompletedTotal("failed");
}
// Scrub properties that might contain sensitive data like URLs
const scrubbedImageBuildProperties = scrubber.scrub({
workspaceId: workspace.id,
instanceId: instance.id,
contextURL: workspace.contextURL,
});
this.analytics.track({
userId: user.id,
event: "imagebuild-failed",
properties: { workspaceId: workspace.id, instanceId: instance.id, contextURL: workspace.contextURL },
properties: scrubbedImageBuildProperties,
});

throw err;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import { repeat } from "@gitpod/gitpod-protocol/lib/util/repeat";
import { PrebuildUpdater } from "./prebuild-updater";
import { RedisPublisher } from "@gitpod/gitpod-db/lib";
import { durationLongerThanSeconds } from "@gitpod/gitpod-protocol/lib/util/timeutil";
import { scrubber } from "@gitpod/gitpod-protocol/lib/util/scrubbing";

export const WorkspaceInstanceController = Symbol("WorkspaceInstanceController");

Expand Down Expand Up @@ -286,17 +287,20 @@ export class WorkspaceInstanceControllerImpl implements WorkspaceInstanceControl

try {
await this.userDB.trace({ span }).deleteGitpodTokensNamedLike(ownerUserID, `${instance.id}-%`);
// Scrub properties that might contain sensitive data like URLs
const scrubbedProperties = scrubber.scrub({
instanceId: instance.id,
workspaceId: instance.workspaceId,
stoppingTime: new Date(instance.stoppingTime!),
conditions: instance.status.conditions,
timeout: instance.status.timeout,
});

this.analytics.track({
userId: ownerUserID,
event: "workspace_stopped",
messageId: `bridge-wsstopped-${instance.id}`,
properties: {
instanceId: instance.id,
workspaceId: instance.workspaceId,
stoppingTime: new Date(instance.stoppingTime!),
conditions: instance.status.conditions,
timeout: instance.status.timeout,
},
properties: scrubbedProperties,
timestamp: new Date(instance.stoppedTime!),
});
} catch (err) {
Expand Down
Loading