@@ -59,6 +59,7 @@ void VirtualUnwinder::unwindCall(UnwindState &State) {
59
59
// pro/epi tracker(Dwarf CFI) for the precise check.
60
60
uint64_t Source = State.getCurrentLBRSource ();
61
61
auto *ParentFrame = State.getParentFrame ();
62
+
62
63
if (ParentFrame == State.getDummyRootPtr () ||
63
64
ParentFrame->Address != Source) {
64
65
State.switchToFrame (Source);
@@ -121,7 +122,7 @@ void VirtualUnwinder::unwindReturn(UnwindState &State) {
121
122
State.InstPtr .update (LBR.Source );
122
123
}
123
124
124
- void VirtualUnwinder::unwindBranchWithinFrame (UnwindState &State) {
125
+ void VirtualUnwinder::unwindBranch (UnwindState &State) {
125
126
// TODO: Tolerate tail call for now, as we may see tail call from libraries.
126
127
// This is only for intra function branches, excluding tail calls.
127
128
uint64_t Source = State.getCurrentLBRSource ();
@@ -219,7 +220,7 @@ void VirtualUnwinder::collectSamplesFromFrameTrie(
219
220
220
221
void VirtualUnwinder::recordBranchCount (const LBREntry &Branch,
221
222
UnwindState &State, uint64_t Repeat) {
222
- if (Branch.IsArtificial )
223
+ if (Branch.IsArtificial || Branch. Target == ExternalAddr )
223
224
return ;
224
225
225
226
if (Binary->usePseudoProbes ()) {
@@ -242,21 +243,18 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
242
243
if (!State.validateInitialState ())
243
244
return false ;
244
245
245
- // Also do not attempt linear unwind for the leaf range as it's incomplete.
246
- bool IsLeaf = true ;
247
-
248
246
// Now process the LBR samples in parrallel with stack sample
249
247
// Note that we do not reverse the LBR entry order so we can
250
248
// unwind the sample stack as we walk through LBR entries.
251
249
while (State.hasNextLBR ()) {
252
250
State.checkStateConsistency ();
253
251
254
- // Unwind implicit calls/returns from inlining, along the linear path,
255
- // break into smaller sub section each with its own calling context.
256
- if (!IsLeaf) {
252
+ // Do not attempt linear unwind for the leaf range as it's incomplete.
253
+ if (!State.IsLastLBR ()) {
254
+ // Unwind implicit calls/returns from inlining, along the linear path,
255
+ // break into smaller sub section each with its own calling context.
257
256
unwindLinear (State, Repeat);
258
257
}
259
- IsLeaf = false ;
260
258
261
259
// Save the LBR branch before it gets unwound.
262
260
const LBREntry &Branch = State.getCurrentLBR ();
@@ -271,9 +269,15 @@ bool VirtualUnwinder::unwind(const PerfSample *Sample, uint64_t Repeat) {
271
269
// Unwind returns - check whether the IP is indeed at a return instruction
272
270
unwindReturn (State);
273
271
} else {
274
- // Unwind branches - for regular intra function branches, we only
275
- // need to record branch with context.
276
- unwindBranchWithinFrame (State);
272
+ // Unwind branches
273
+ // For regular intra function branches, we only need to record branch with
274
+ // context. For an artificial branch cross function boundaries, we got an
275
+ // issue with returning to external code. Take the two LBR enties for
276
+ // example: [foo:8(RETURN), ext:1] [ext:3(CALL), bar:1] After perf reader,
277
+ // we only get[foo:8(RETURN), bar:1], unwinder will be confused like foo
278
+ // return to bar. Here we detect and treat this case as BRANCH instead of
279
+ // RETURN which only update the source address.
280
+ unwindBranch (State);
277
281
}
278
282
State.advanceLBR ();
279
283
// Record `branch` with calling context after unwinding.
@@ -432,9 +436,9 @@ void HybridPerfReader::unwindSamples() {
432
436
if (Binary->useFSDiscriminator ())
433
437
exitWithError (" FS discriminator is not supported in CS profile." );
434
438
std::set<uint64_t > AllUntrackedCallsites;
439
+ VirtualUnwinder Unwinder (&SampleCounters, Binary);
435
440
for (const auto &Item : AggregatedSamples) {
436
441
const PerfSample *Sample = Item.first .getPtr ();
437
- VirtualUnwinder Unwinder (&SampleCounters, Binary);
438
442
Unwinder.unwind (Sample, Item.second );
439
443
auto &CurrUntrackedCallsites = Unwinder.getUntrackedCallsites ();
440
444
AllUntrackedCallsites.insert (CurrUntrackedCallsites.begin (),
@@ -508,26 +512,32 @@ bool PerfScriptReader::extractLBRStack(TraceStream &TraceIt,
508
512
bool IsOutgoing = SrcIsInternal && !DstIsInternal;
509
513
bool IsArtificial = false ;
510
514
511
- // Ignore branches outside the current binary. Ignore all remaining branches
512
- // if there's no incoming branch before the external branch in reverse
513
- // order.
515
+ // Ignore branches outside the current binary.
514
516
if (IsExternal) {
515
- if (PrevTrDst)
516
- continue ;
517
- if (!LBRStack.empty ()) {
517
+ if (!PrevTrDst && !LBRStack.empty ()) {
518
518
WithColor::warning ()
519
519
<< " Invalid transfer to external code in LBR record at line "
520
520
<< TraceIt.getLineNumber () << " : " << TraceIt.getCurrentLine ()
521
521
<< " \n " ;
522
522
}
523
- break ;
523
+ // Do not ignore the entire samples, the remaining LBR can still be
524
+ // unwound using a context-less stack.
525
+ continue ;
524
526
}
525
527
526
528
if (IsOutgoing) {
527
529
if (!PrevTrDst) {
528
- // This is unpaired outgoing jump which is likely due to interrupt or
529
- // incomplete LBR trace. Ignore current and subsequent entries since
530
- // they are likely in different contexts.
530
+ // This is a leading outgoing LBR, we should keep processing the LBRs.
531
+ if (LBRStack.empty ()) {
532
+ NumLeadingOutgoingLBR++;
533
+ // Record this LBR since current source and next LBR' target is still
534
+ // a valid range.
535
+ LBRStack.emplace_back (LBREntry (Src, ExternalAddr, false ));
536
+ continue ;
537
+ }
538
+ // This is middle unpaired outgoing jump which is likely due to
539
+ // interrupt or incomplete LBR trace. Ignore current and subsequent
540
+ // entries since they are likely in different contexts.
531
541
break ;
532
542
}
533
543
@@ -593,9 +603,17 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
593
603
}
594
604
TraceIt.advance ();
595
605
// Currently intermixed frame from different binaries is not supported.
596
- // Ignore caller frames not from binary of interest.
597
- if (!Binary->addressIsCode (FrameAddr))
598
- break ;
606
+ if (!Binary->addressIsCode (FrameAddr)) {
607
+ if (CallStack.empty ())
608
+ NumLeafExternalFrame++;
609
+ // Push a special value(ExternalAddr) for the external frames so that
610
+ // unwinder can still work on this with artificial Call/Return branch.
611
+ // After unwinding, the context will be truncated for external frame.
612
+ // Also deduplicate the consecutive external addresses.
613
+ if (CallStack.empty () || CallStack.back () != ExternalAddr)
614
+ CallStack.emplace_back (ExternalAddr);
615
+ continue ;
616
+ }
599
617
600
618
// We need to translate return address to call address for non-leaf frames.
601
619
if (!CallStack.empty ()) {
@@ -613,6 +631,10 @@ bool PerfScriptReader::extractCallstack(TraceStream &TraceIt,
613
631
CallStack.emplace_back (FrameAddr);
614
632
}
615
633
634
+ // Strip out the bottom external addr.
635
+ if (CallStack.size () > 1 && CallStack.back () == ExternalAddr)
636
+ CallStack.pop_back ();
637
+
616
638
// Skip other unrelated line, find the next valid LBR line
617
639
// Note that even for empty call stack, we should skip the address at the
618
640
// bottom, otherwise the following pass may generate a truncated callstack
@@ -885,6 +907,7 @@ uint64_t PerfScriptReader::parseAggregatedCount(TraceStream &TraceIt) {
885
907
}
886
908
887
909
void PerfScriptReader::parseSample (TraceStream &TraceIt) {
910
+ NumTotalSample++;
888
911
uint64_t Count = parseAggregatedCount (TraceIt);
889
912
assert (Count >= 1 && " Aggregated count should be >= 1!" );
890
913
parseSample (TraceIt, Count);
@@ -1131,6 +1154,11 @@ void PerfScriptReader::parsePerfTraces() {
1131
1154
// Parse perf traces and do aggregation.
1132
1155
parseAndAggregateTrace ();
1133
1156
1157
+ emitWarningSummary (NumLeafExternalFrame, NumTotalSample,
1158
+ " of samples have leaf external frame in call stack." );
1159
+ emitWarningSummary (NumLeadingOutgoingLBR, NumTotalSample,
1160
+ " of samples have leading external LBR." );
1161
+
1134
1162
// Generate unsymbolized profile.
1135
1163
warnTruncatedStack ();
1136
1164
warnInvalidRange ();
0 commit comments