Skip to content

Commit dcc3669

Browse files
lberkicopybara-github
authored andcommitted
Report CPU and wall times in profiler output for build commands again.
This was broken in bazelbuild@cb42a1d. The reason is that MetricsCollector assumed that the profiler was active while calling finishTimingMetrics(), which used to be the case before that change. However, that change made finishTimingMetrics() be called in CommandPrecompleteEvent and (somewhat confusingly) BuildSummaryStatsModule shuts down the profiler in BuildCompleteEvent, which happens before. Ideally, we'd shut down the profiler at the last minute, but we can't do that because we publish the profiler data to the BEP, which in turn shuts down in BuildCompleteEvent. So we must shut down the profiler in BuildCompleteEvent, but that doesn't happen on non-build commands so we must also shut it down some place that is called on every command. This of course means that we lose profiler events between BuildCompleteEvent and the actual end of the command. The profiler is changed so that timing data is available after .stop(), which makes MetricsCollector work again. RELNOTES: None. PiperOrigin-RevId: 671727112 Change-Id: Ibf336a7a387449a3edbaffb3d399e5b5e9ed0ff4
1 parent 09fe68a commit dcc3669

File tree

5 files changed

+59
-49
lines changed

5 files changed

+59
-49
lines changed

src/main/java/com/google/devtools/build/lib/metrics/MetricsCollector.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ public synchronized void handleExecutionPhaseStart(
217217
public void onSomeExecutionStarted(SomeExecutionStartedEvent event) {
218218
if (event.countedInExecutionTime()) {
219219
if (executionStarted.compareAndSet(false, true)) {
220-
Duration elapsedWallTime = Profiler.elapsedTimeMaybe();
220+
Duration elapsedWallTime = Profiler.getProfileElapsedTime();
221221
if (elapsedWallTime != null) {
222222
timingMetrics.setActionsExecutionStartInMs(elapsedWallTime.toMillis());
223223
}
@@ -490,11 +490,11 @@ private CumulativeMetrics createCumulativeMetrics() {
490490
}
491491

492492
private TimingMetrics finishTimingMetrics() {
493-
Duration elapsedWallTime = Profiler.elapsedTimeMaybe();
493+
Duration elapsedWallTime = Profiler.getProfileElapsedTime();
494494
if (elapsedWallTime != null) {
495495
timingMetrics.setWallTimeInMs(elapsedWallTime.toMillis());
496496
}
497-
Duration cpuTime = Profiler.getProcessCpuTimeMaybe();
497+
Duration cpuTime = Profiler.getServerProcessCpuTime();
498498
if (cpuTime != null) {
499499
timingMetrics.setCpuTimeInMs(cpuTime.toMillis());
500500
}

src/main/java/com/google/devtools/build/lib/profiler/Profiler.java

Lines changed: 48 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import static com.google.common.base.Preconditions.checkState;
1818

1919
import com.google.common.annotations.VisibleForTesting;
20+
import com.google.common.base.Preconditions;
2021
import com.google.common.base.Predicate;
2122
import com.google.common.collect.ImmutableList;
2223
import com.google.common.collect.ImmutableSet;
@@ -306,9 +307,12 @@ ImmutableList<SlowTask> getSlowestTasks() {
306307

307308
private Clock clock;
308309
private Set<ProfilerTask> profiledTasks;
309-
private volatile long profileStartTime;
310+
private volatile boolean active = false;
310311
private volatile boolean recordAllDurations = false;
311-
private Duration profileCpuStartTime;
312+
private Duration profileCpuStartTime = Duration.ZERO;
313+
private Duration profileCpuEndTime = Duration.ZERO;
314+
private Duration profileStartTime = Duration.ZERO;
315+
private Duration profileEndTime = Duration.ZERO;
312316

313317
/**
314318
* The reference to the current writer, if any. If the referenced writer is null, then disk writes
@@ -376,7 +380,8 @@ private void initHistograms() {
376380
// stop instead? However, this is currently only called from one location in a module, and that
377381
// can't call stop itself. What to do?
378382
public synchronized ImmutableList<StatRecorder> getTasksHistograms() {
379-
return isActive() ? ImmutableList.copyOf(tasksHistograms) : ImmutableList.of();
383+
Preconditions.checkState(isActive());
384+
return ImmutableList.copyOf(tasksHistograms);
380385
}
381386

382387
public static Profiler instance() {
@@ -393,14 +398,12 @@ public static long nanoTimeMaybe() {
393398
return -1;
394399
}
395400

396-
// Returns the elapsed wall clock time since the profile has been started or null if inactive.
397-
@Nullable
398-
public static Duration elapsedTimeMaybe() {
399-
if (instance.isActive()) {
400-
return Duration.ofNanos(instance.clock.nanoTime())
401-
.minus(Duration.ofNanos(instance.profileStartTime));
402-
}
403-
return null;
401+
// Returns the elapsed wall clock time since the profile has been started.
402+
public static Duration getProfileElapsedTime() {
403+
Duration endTime =
404+
instance.isActive() ? Duration.ofNanos(instance.clock.nanoTime()) : instance.profileEndTime;
405+
406+
return endTime.minus(instance.profileStartTime);
404407
}
405408

406409
private static Duration getProcessCpuTime() {
@@ -409,13 +412,10 @@ private static Duration getProcessCpuTime() {
409412
return Duration.ofNanos(bean.getProcessCpuTime());
410413
}
411414

412-
// Returns the CPU time since the profile has been started or null if inactive.
413-
@Nullable
414-
public static Duration getProcessCpuTimeMaybe() {
415-
if (instance().isActive()) {
416-
return getProcessCpuTime().minus(instance().profileCpuStartTime);
417-
}
418-
return null;
415+
// Returns the CPU time since the profile has been started.
416+
public static Duration getServerProcessCpuTime() {
417+
Duration cpuEndTime = instance.isActive() ? getProcessCpuTime() : instance.profileCpuEndTime;
418+
return cpuEndTime.minus(instance.profileCpuStartTime);
419419
}
420420

421421
/**
@@ -447,7 +447,8 @@ public synchronized void start(
447447
boolean collectTaskHistograms,
448448
LocalResourceCollector localResourceCollector)
449449
throws IOException {
450-
checkState(!isActive(), "Profiler already active");
450+
checkState(!active, "Profiler already active");
451+
451452
initHistograms();
452453

453454
this.profiledTasks = profiledTasks.isEmpty() ? profiledTasks : EnumSet.copyOf(profiledTasks);
@@ -484,8 +485,9 @@ public synchronized void start(
484485
this.writerRef.set(writer);
485486

486487
// Activate profiler.
487-
profileStartTime = execStartTimeNanos;
488+
profileStartTime = Duration.ofNanos(execStartTimeNanos);
488489
profileCpuStartTime = getProcessCpuTime();
490+
active = true;
489491

490492
this.localResourceCollector = localResourceCollector;
491493
// Start collecting Bazel and system-wide CPU metric collection.
@@ -553,24 +555,41 @@ private boolean hasNonZeroValues(Map<ProfilerTask, double[]> countersSeriesMap)
553555
* will no longer be recorded in the profile.
554556
*/
555557
public synchronized void stop() throws IOException {
556-
if (!isActive()) {
558+
if (!active) {
557559
return;
558560
}
559561

560562
collectActionCounts();
561-
562563
localResourceCollector.stop();
563564

564565
// Log a final event to update the duration of ProfilePhase.FINISH.
565566
logEvent(ProfilerTask.INFO, "Finishing");
566-
JsonTraceFileWriter writer = writerRef.getAndSet(null);
567-
if (writer != null) {
568-
writer.shutdown();
569-
writer = null;
567+
try {
568+
JsonTraceFileWriter writer = writerRef.getAndSet(null);
569+
if (writer != null) {
570+
writer.shutdown();
571+
writer = null;
572+
}
573+
} finally {
574+
profileCpuEndTime = getProcessCpuTime();
575+
profileEndTime = Duration.ofNanos(clock.nanoTime());
576+
active = false;
570577
}
578+
}
579+
580+
/**
581+
* Clears the records the profiler instance keeps.
582+
*
583+
* <p>Should always be called between a {@link #stop()} and a subsequent {@link #start}.
584+
*/
585+
public synchronized void clear() {
586+
Preconditions.checkState(!active);
587+
571588
Arrays.fill(tasksHistograms, null);
572-
profileStartTime = 0L;
573-
profileCpuStartTime = null;
589+
profileStartTime = Duration.ZERO;
590+
profileEndTime = Duration.ZERO;
591+
profileCpuStartTime = Duration.ZERO;
592+
profileCpuEndTime = Duration.ZERO;
574593

575594
for (SlowestTaskAggregator aggregator : slowestTasks) {
576595
if (aggregator != null) {
@@ -583,7 +602,7 @@ public synchronized void stop() throws IOException {
583602

584603
/** Returns true iff profiling is currently enabled. */
585604
public boolean isActive() {
586-
return profileStartTime != 0L;
605+
return active;
587606
}
588607

589608
public boolean isProfiling(ProfilerTask type) {

src/main/java/com/google/devtools/build/lib/runtime/BlazeCommandDispatcher.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -729,12 +729,14 @@ private BlazeCommandResult execExclusively(
729729

730730
try {
731731
Profiler.instance().stop();
732-
MemoryProfiler.instance().stop();
733732
} catch (IOException e) {
734733
env.getReporter()
735734
.handle(Event.error("Error while writing profile file: " + e.getMessage()));
736735
}
737736

737+
Profiler.instance().clear();
738+
MemoryProfiler.instance().stop();
739+
738740
// Swallow IOException, as we are already in a finally clause
739741
Flushables.flushQuietly(outErr.getOutputStream());
740742
Flushables.flushQuietly(outErr.getErrorStream());

src/test/java/com/google/devtools/build/lib/profiler/ProfilerTest.java

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ public final class ProfilerTest {
6464
@Before
6565
public void setManualClock() {
6666
BlazeClock.setClock(clock);
67+
profiler.clear();
6768
}
6869

6970
@AfterClass
@@ -732,21 +733,6 @@ public long nanoTime() {
732733
profiler.stop();
733734
}
734735

735-
/** Checks that the histograms are cleared in the stop call. */
736-
@Test
737-
public void testEmptyTaskHistograms() throws Exception {
738-
startUnbuffered(getAllProfilerTasks());
739-
profiler.logSimpleTaskDuration(
740-
Profiler.nanoTimeMaybe(), Duration.ofSeconds(10), ProfilerTask.INFO, "foo");
741-
for (StatRecorder recorder : profiler.tasksHistograms) {
742-
assertThat(recorder).isNotNull();
743-
}
744-
profiler.stop();
745-
for (StatRecorder recorder : profiler.tasksHistograms) {
746-
assertThat(recorder).isNull();
747-
}
748-
}
749-
750736
@Test
751737
public void testTaskHistograms() throws Exception {
752738
startUnbuffered(getAllProfilerTasks());

src/test/shell/integration/build_event_stream_test.sh

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1478,9 +1478,12 @@ EOF
14781478
chmod +x a/a.sh
14791479

14801480
bazel build --build_event_text_file=bep.txt //a:a || fail "build failed"
1481+
14811482
assert_contains "^build_metrics {" bep.txt
1483+
assert_contains "cpu_time_in_ms: " bep.txt
14821484

1483-
bazel test --build_event_text_file=bep.txt //a:a || fail "build failed"
1485+
# Check if null builds still have build metrics
1486+
bazel build --build_event_text_file=bep.txt //a:a || fail "build failed"
14841487
assert_contains "^build_metrics {" bep.txt
14851488
}
14861489

0 commit comments

Comments
 (0)