Skip to content

Commit 28e67f9

Browse files
authored
chore: Add changes in Spanner executor for testing end to end tracing (#3264)
* Add opt-in flag and ClientInterceptor to propagate trace context for Spanner server side tracing * Add changes in Spanner executor for testing end to end tracing * fix github check failure for dependencies * minor fix * minor fix * fix issue related to traces being not returned from getTrace api call need to wait some time for traces to be visible. * fix formatting * update dependencies
1 parent 7e28326 commit 28e67f9

File tree

6 files changed

+306
-31
lines changed

6 files changed

+306
-31
lines changed

google-cloud-spanner-executor/pom.xml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,40 @@
2222
</properties>
2323

2424
<dependencies>
25+
<dependency>
26+
<groupId>io.opentelemetry</groupId>
27+
<artifactId>opentelemetry-api</artifactId>
28+
</dependency>
29+
<dependency>
30+
<groupId>io.opentelemetry</groupId>
31+
<artifactId>opentelemetry-context</artifactId>
32+
</dependency>
33+
<dependency>
34+
<groupId>io.opentelemetry</groupId>
35+
<artifactId>opentelemetry-sdk</artifactId>
36+
</dependency>
37+
<dependency>
38+
<groupId>io.opentelemetry</groupId>
39+
<artifactId>opentelemetry-sdk-common</artifactId>
40+
</dependency>
41+
<dependency>
42+
<groupId>io.opentelemetry</groupId>
43+
<artifactId>opentelemetry-sdk-trace</artifactId>
44+
</dependency>
45+
<dependency>
46+
<groupId>com.google.cloud.opentelemetry</groupId>
47+
<artifactId>exporter-trace</artifactId>
48+
<version>0.32.0</version>
49+
</dependency>
2550
<dependency>
2651
<groupId>com.google.cloud</groupId>
2752
<artifactId>google-cloud-spanner</artifactId>
2853
</dependency>
54+
<dependency>
55+
<groupId>com.google.cloud</groupId>
56+
<artifactId>google-cloud-trace</artifactId>
57+
<version>2.52.0</version>
58+
</dependency>
2959
<dependency>
3060
<groupId>io.grpc</groupId>
3161
<artifactId>grpc-api</artifactId>
@@ -94,6 +124,11 @@
94124
<groupId>com.google.api.grpc</groupId>
95125
<artifactId>proto-google-cloud-spanner-executor-v1</artifactId>
96126
</dependency>
127+
<dependency>
128+
<groupId>com.google.api.grpc</groupId>
129+
<artifactId>proto-google-cloud-trace-v1</artifactId>
130+
<version>2.52.0</version>
131+
</dependency>
97132
<dependency>
98133
<groupId>com.google.api.grpc</groupId>
99134
<artifactId>grpc-google-cloud-spanner-executor-v1</artifactId>

google-cloud-spanner-executor/src/main/java/com/google/cloud/executor/spanner/CloudClientExecutor.java

Lines changed: 143 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
import static com.google.cloud.spanner.TransactionRunner.TransactionCallable;
2020

21+
import com.google.api.gax.core.FixedCredentialsProvider;
2122
import com.google.api.gax.longrunning.OperationFuture;
2223
import com.google.api.gax.paging.Page;
2324
import com.google.api.gax.retrying.RetrySettings;
@@ -70,15 +71,21 @@
7071
import com.google.cloud.spanner.TimestampBound;
7172
import com.google.cloud.spanner.TransactionContext;
7273
import com.google.cloud.spanner.TransactionRunner;
74+
import com.google.cloud.spanner.TransactionRunner.TransactionCallable;
7375
import com.google.cloud.spanner.Type;
7476
import com.google.cloud.spanner.Value;
7577
import com.google.cloud.spanner.encryption.CustomerManagedEncryption;
7678
import com.google.cloud.spanner.v1.stub.SpannerStubSettings;
79+
import com.google.cloud.trace.v1.TraceServiceClient;
80+
import com.google.cloud.trace.v1.TraceServiceSettings;
7781
import com.google.common.base.Function;
7882
import com.google.common.base.Joiner;
7983
import com.google.common.base.Preconditions;
8084
import com.google.common.collect.Lists;
8185
import com.google.common.util.concurrent.ThreadFactoryBuilder;
86+
import com.google.devtools.cloudtrace.v1.GetTraceRequest;
87+
import com.google.devtools.cloudtrace.v1.Trace;
88+
import com.google.devtools.cloudtrace.v1.TraceSpan;
8289
import com.google.longrunning.Operation;
8390
import com.google.protobuf.ByteString;
8491
import com.google.protobuf.util.Timestamps;
@@ -152,6 +159,9 @@
152159
import com.google.spanner.v1.TypeCode;
153160
import io.grpc.Status;
154161
import io.grpc.stub.StreamObserver;
162+
import io.opentelemetry.api.trace.Span;
163+
import io.opentelemetry.api.trace.Tracer;
164+
import io.opentelemetry.context.Scope;
155165
import java.io.ByteArrayInputStream;
156166
import java.io.File;
157167
import java.io.IOException;
@@ -166,7 +176,9 @@
166176
import java.util.Objects;
167177
import java.util.concurrent.ExecutionException;
168178
import java.util.concurrent.Executor;
179+
import java.util.concurrent.ExecutorService;
169180
import java.util.concurrent.Executors;
181+
import java.util.concurrent.Future;
170182
import java.util.concurrent.TimeUnit;
171183
import java.util.logging.Level;
172184
import java.util.logging.Logger;
@@ -332,24 +344,28 @@ public void startRWTransaction() throws Exception {
332344
// Try to commit
333345
return null;
334346
};
347+
io.opentelemetry.context.Context context = io.opentelemetry.context.Context.current();
335348
Runnable runnable =
336-
() -> {
337-
try {
338-
runner =
339-
optimistic
340-
? dbClient.readWriteTransaction(Options.optimisticLock())
341-
: dbClient.readWriteTransaction();
342-
LOGGER.log(Level.INFO, String.format("Ready to run callable %s\n", transactionSeed));
343-
runner.run(callable);
344-
transactionSucceeded(runner.getCommitTimestamp().toProto());
345-
} catch (SpannerException e) {
346-
LOGGER.log(
347-
Level.WARNING,
348-
String.format("Transaction runnable failed with exception %s\n", e.getMessage()),
349-
e);
350-
transactionFailed(e);
351-
}
352-
};
349+
context.wrap(
350+
() -> {
351+
try {
352+
runner =
353+
optimistic
354+
? dbClient.readWriteTransaction(Options.optimisticLock())
355+
: dbClient.readWriteTransaction();
356+
LOGGER.log(
357+
Level.INFO, String.format("Ready to run callable %s\n", transactionSeed));
358+
runner.run(callable);
359+
transactionSucceeded(runner.getCommitTimestamp().toProto());
360+
} catch (SpannerException e) {
361+
LOGGER.log(
362+
Level.WARNING,
363+
String.format(
364+
"Transaction runnable failed with exception %s\n", e.getMessage()),
365+
e);
366+
transactionFailed(e);
367+
}
368+
});
353369
LOGGER.log(
354370
Level.INFO,
355371
String.format("Callable and Runnable created, ready to execute %s\n", transactionSeed));
@@ -753,6 +769,11 @@ public synchronized void closeBatchTxn() throws SpannerException {
753769
Executors.newCachedThreadPool(
754770
new ThreadFactoryBuilder().setNameFormat("action-pool-%d").build());
755771

772+
// Thread pool to verify end to end traces.
773+
private static final ExecutorService endToEndTracesThreadPool =
774+
Executors.newCachedThreadPool(
775+
new ThreadFactoryBuilder().setNameFormat("end-to-end-traces-pool-%d").build());
776+
756777
private synchronized Spanner getClientWithTimeout(
757778
long timeoutSeconds, boolean useMultiplexedSession) throws IOException {
758779
if (clientWithTimeout != null) {
@@ -818,6 +839,8 @@ private synchronized Spanner getClient(long timeoutSeconds, boolean useMultiplex
818839
.setHost(HOST_PREFIX + WorkerProxy.spannerPort)
819840
.setCredentials(credentials)
820841
.setChannelProvider(channelProvider)
842+
.setEnableEndToEndTracing(true)
843+
.setOpenTelemetry(WorkerProxy.openTelemetrySdk)
821844
.setSessionPoolOption(sessionPoolOptions);
822845

823846
SpannerStubSettings.Builder stubSettingsBuilder =
@@ -841,6 +864,88 @@ private synchronized Spanner getClient(long timeoutSeconds, boolean useMultiplex
841864
return optionsBuilder.build().getService();
842865
}
843866

867+
private TraceServiceClient traceServiceClient;
868+
869+
// Return the trace service client, create one if not exists.
870+
private synchronized TraceServiceClient getTraceServiceClient() throws IOException {
871+
if (traceServiceClient != null) {
872+
return traceServiceClient;
873+
}
874+
// Create a trace service client
875+
Credentials credentials;
876+
if (WorkerProxy.serviceKeyFile.isEmpty()) {
877+
credentials = NoCredentials.getInstance();
878+
} else {
879+
credentials =
880+
GoogleCredentials.fromStream(
881+
new ByteArrayInputStream(
882+
FileUtils.readFileToByteArray(new File(WorkerProxy.serviceKeyFile))),
883+
HTTP_TRANSPORT_FACTORY);
884+
}
885+
886+
TraceServiceSettings traceServiceSettings =
887+
TraceServiceSettings.newBuilder()
888+
.setEndpoint(WorkerProxy.CLOUD_TRACE_ENDPOINT)
889+
.setCredentialsProvider(FixedCredentialsProvider.create(credentials))
890+
.build();
891+
892+
traceServiceClient = TraceServiceClient.create(traceServiceSettings);
893+
return traceServiceClient;
894+
}
895+
896+
public Future<Boolean> getEndToEndTraceVerificationTask(String traceId) {
897+
return endToEndTracesThreadPool.submit(
898+
() -> {
899+
try {
900+
// Wait for 10 seconds before verifying to ensure traces are exported.
901+
long sleepDuration = TimeUnit.SECONDS.toMillis(10);
902+
LOGGER.log(
903+
Level.INFO,
904+
String.format(
905+
"Sleeping for %d milliseconds before verifying end to end trace",
906+
sleepDuration));
907+
Thread.sleep(sleepDuration);
908+
} catch (InterruptedException e) {
909+
Thread.currentThread().interrupt(); // Handle interruption
910+
LOGGER.log(Level.INFO, String.format("Thread interrupted."));
911+
return false; // Return false if interrupted
912+
}
913+
return isExportedEndToEndTraceValid(traceId);
914+
});
915+
}
916+
917+
private static final String READ_WRITE_TRANSACTION = "CloudSpanner.ReadWriteTransaction";
918+
private static final String READ_ONLY_TRANSACTION = "CloudSpanner.ReadOnlyTransaction";
919+
920+
/* Returns whether a exported trace is valid. */
921+
public boolean isExportedEndToEndTraceValid(String traceId) {
922+
try {
923+
GetTraceRequest getTraceRequest =
924+
GetTraceRequest.newBuilder()
925+
.setProjectId(WorkerProxy.PROJECT_ID)
926+
.setTraceId(traceId)
927+
.build();
928+
Trace trace = getTraceServiceClient().getTrace(getTraceRequest);
929+
boolean readWriteOrReadOnlyTxnPresent = false, spannerServerSideSpanPresent = false;
930+
for (TraceSpan span : trace.getSpansList()) {
931+
if (span.getName().contains(READ_ONLY_TRANSACTION)
932+
|| span.getName().contains(READ_WRITE_TRANSACTION)) {
933+
readWriteOrReadOnlyTxnPresent = true;
934+
}
935+
if (span.getName().startsWith("Spanner.")) {
936+
spannerServerSideSpanPresent = true;
937+
}
938+
}
939+
if (readWriteOrReadOnlyTxnPresent && !spannerServerSideSpanPresent) {
940+
return false;
941+
}
942+
} catch (Exception e) {
943+
LOGGER.log(Level.WARNING, "Failed to verify end to end trace.", e);
944+
return false;
945+
}
946+
return true;
947+
}
948+
844949
/** Handle actions. */
845950
public Status startHandlingRequest(
846951
SpannerAsyncActionRequest req, ExecutionFlowContext executionContext) {
@@ -865,17 +970,20 @@ public Status startHandlingRequest(
865970
useMultiplexedSession = false;
866971
}
867972

973+
io.opentelemetry.context.Context context = io.opentelemetry.context.Context.current();
868974
actionThreadPool.execute(
869-
() -> {
870-
Status status =
871-
executeAction(outcomeSender, action, dbPath, useMultiplexedSession, executionContext);
872-
if (!status.isOk()) {
873-
LOGGER.log(
874-
Level.WARNING,
875-
String.format("Failed to execute action with error: %s\n%s", status, action));
876-
executionContext.onError(status.getCause());
877-
}
878-
});
975+
context.wrap(
976+
() -> {
977+
Status status =
978+
executeAction(
979+
outcomeSender, action, dbPath, useMultiplexedSession, executionContext);
980+
if (!status.isOk()) {
981+
LOGGER.log(
982+
Level.WARNING,
983+
String.format("Failed to execute action with error: %s\n%s", status, action));
984+
executionContext.onError(status.getCause());
985+
}
986+
}));
879987
return Status.OK;
880988
}
881989

@@ -886,7 +994,10 @@ private Status executeAction(
886994
String dbPath,
887995
boolean useMultiplexedSession,
888996
ExecutionFlowContext executionContext) {
889-
997+
Tracer tracer = WorkerProxy.openTelemetrySdk.getTracer(CloudClientExecutor.class.getName());
998+
String actionType = action.getActionCase().toString();
999+
Span span = tracer.spanBuilder(String.format("performaction_%s", actionType)).startSpan();
1000+
Scope scope = span.makeCurrent();
8901001
try {
8911002
if (action.hasAdmin()) {
8921003
return executeAdminAction(useMultiplexedSession, action.getAdmin(), outcomeSender);
@@ -959,11 +1070,15 @@ private Status executeAction(
9591070
ErrorCode.UNIMPLEMENTED, "Not implemented yet: \n" + action)));
9601071
}
9611072
} catch (Exception e) {
1073+
span.recordException(e);
9621074
LOGGER.log(Level.WARNING, "Unexpected error: " + e.getMessage());
9631075
return outcomeSender.finishWithError(
9641076
toStatus(
9651077
SpannerExceptionFactory.newSpannerException(
9661078
ErrorCode.INVALID_ARGUMENT, "Unexpected error: " + e.getMessage())));
1079+
} finally {
1080+
scope.close();
1081+
span.end();
9671082
}
9681083
}
9691084

0 commit comments

Comments
 (0)