Skip to content

Commit cb1a7d2

Browse files
authored
[symbolizer] Support symbol+offset lookup (#75067)
GNU addr2line supports lookup by symbol name in addition to the existing address lookup. llvm-symbolizer starting from e144ae5 supports lookup by symbol name. This change extends this lookup with possibility to specify optional offset. Now the address for which source information is searched for can be specified with offset: llvm-symbolize --obj=abc.so "SYMBOL func_22+0x12" It decreases the gap in features of llvm-symbolizer and GNU addr2line. This lookup now is supported for code only. Migrated from: https://reviews.llvm.org/D139859 Pull request: #75067
1 parent 229273f commit cb1a7d2

File tree

8 files changed

+101
-34
lines changed

8 files changed

+101
-34
lines changed

llvm/include/llvm/DebugInfo/Symbolize/SymbolizableModule.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class SymbolizableModule {
3737
symbolizeFrame(object::SectionedAddress ModuleOffset) const = 0;
3838

3939
virtual std::vector<object::SectionedAddress>
40-
findSymbol(StringRef Symbol) const = 0;
40+
findSymbol(StringRef Symbol, uint64_t Offset) const = 0;
4141

4242
// Return true if this is a 32-bit x86 PE COFF module.
4343
virtual bool isWin32Module() const = 0;

llvm/include/llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class SymbolizableObjectFile : public SymbolizableModule {
4444
std::vector<DILocal>
4545
symbolizeFrame(object::SectionedAddress ModuleOffset) const override;
4646
std::vector<object::SectionedAddress>
47-
findSymbol(StringRef Symbol) const override;
47+
findSymbol(StringRef Symbol, uint64_t Offset) const override;
4848

4949
// Return true if this is a 32-bit x86 PE COFF module.
5050
bool isWin32Module() const override;

llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,12 @@ class LLVMSymbolizer {
105105
symbolizeFrame(ArrayRef<uint8_t> BuildID,
106106
object::SectionedAddress ModuleOffset);
107107

108-
Expected<std::vector<DILineInfo>> findSymbol(const ObjectFile &Obj,
109-
StringRef Symbol);
110-
Expected<std::vector<DILineInfo>> findSymbol(StringRef ModuleName,
111-
StringRef Symbol);
112-
Expected<std::vector<DILineInfo>> findSymbol(ArrayRef<uint8_t> BuildID,
113-
StringRef Symbol);
108+
Expected<std::vector<DILineInfo>>
109+
findSymbol(const ObjectFile &Obj, StringRef Symbol, uint64_t Offset);
110+
Expected<std::vector<DILineInfo>>
111+
findSymbol(const std::string &ModuleName, StringRef Symbol, uint64_t Offset);
112+
Expected<std::vector<DILineInfo>>
113+
findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol, uint64_t Offset);
114114

115115
void flush();
116116

@@ -155,8 +155,8 @@ class LLVMSymbolizer {
155155
symbolizeFrameCommon(const T &ModuleSpecifier,
156156
object::SectionedAddress ModuleOffset);
157157
template <typename T>
158-
Expected<std::vector<DILineInfo>> findSymbolCommon(const T &ModuleSpecifier,
159-
StringRef Symbol);
158+
Expected<std::vector<DILineInfo>>
159+
findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol, uint64_t Offset);
160160

161161
Expected<SymbolizableModule *> getOrCreateModuleInfo(const ObjectFile &Obj);
162162

llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -352,12 +352,14 @@ std::vector<DILocal> SymbolizableObjectFile::symbolizeFrame(
352352
}
353353

354354
std::vector<object::SectionedAddress>
355-
SymbolizableObjectFile::findSymbol(StringRef Symbol) const {
355+
SymbolizableObjectFile::findSymbol(StringRef Symbol, uint64_t Offset) const {
356356
std::vector<object::SectionedAddress> Result;
357357
for (const SymbolDesc &Sym : Symbols) {
358358
if (Sym.Name.equals(Symbol)) {
359-
object::SectionedAddress A{Sym.Addr,
360-
getModuleSectionIndexForAddress(Sym.Addr)};
359+
uint64_t Addr = Sym.Addr;
360+
if (Offset < Sym.Size)
361+
Addr += Offset;
362+
object::SectionedAddress A{Addr, getModuleSectionIndexForAddress(Addr)};
361363
Result.push_back(A);
362364
}
363365
}

llvm/lib/DebugInfo/Symbolize/Symbolize.cpp

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,8 @@ LLVMSymbolizer::symbolizeFrame(ArrayRef<uint8_t> BuildID,
233233

234234
template <typename T>
235235
Expected<std::vector<DILineInfo>>
236-
LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) {
236+
LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol,
237+
uint64_t Offset) {
237238
auto InfoOrErr = getOrCreateModuleInfo(ModuleSpecifier);
238239
if (!InfoOrErr)
239240
return InfoOrErr.takeError();
@@ -246,7 +247,7 @@ LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) {
246247
if (!Info)
247248
return Result;
248249

249-
for (object::SectionedAddress A : Info->findSymbol(Symbol)) {
250+
for (object::SectionedAddress A : Info->findSymbol(Symbol, Offset)) {
250251
DILineInfo LineInfo = Info->symbolizeCode(
251252
A, DILineInfoSpecifier(Opts.PathStyle, Opts.PrintFunctions),
252253
Opts.UseSymbolTable);
@@ -261,18 +262,21 @@ LLVMSymbolizer::findSymbolCommon(const T &ModuleSpecifier, StringRef Symbol) {
261262
}
262263

263264
Expected<std::vector<DILineInfo>>
264-
LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol) {
265-
return findSymbolCommon(Obj, Symbol);
265+
LLVMSymbolizer::findSymbol(const ObjectFile &Obj, StringRef Symbol,
266+
uint64_t Offset) {
267+
return findSymbolCommon(Obj, Symbol, Offset);
266268
}
267269

268270
Expected<std::vector<DILineInfo>>
269-
LLVMSymbolizer::findSymbol(StringRef ModuleName, StringRef Symbol) {
270-
return findSymbolCommon(ModuleName.str(), Symbol);
271+
LLVMSymbolizer::findSymbol(const std::string &ModuleName, StringRef Symbol,
272+
uint64_t Offset) {
273+
return findSymbolCommon(ModuleName, Symbol, Offset);
271274
}
272275

273276
Expected<std::vector<DILineInfo>>
274-
LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol) {
275-
return findSymbolCommon(BuildID, Symbol);
277+
LLVMSymbolizer::findSymbol(ArrayRef<uint8_t> BuildID, StringRef Symbol,
278+
uint64_t Offset) {
279+
return findSymbolCommon(BuildID, Symbol, Offset);
276280
}
277281

278282
void LLVMSymbolizer::flush() {

llvm/test/tools/llvm-symbolizer/symbol-search.test

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,40 @@ RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01 func_02 | FileCheck --ch
3434
FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
3535
FUNCS: /tmp/dbginfo{{[/\]+}}symbols.part2.cpp:10
3636

37+
# Symbol may be combined with offset.
38+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01+0 | FileCheck --check-prefix=SYMLINE0 %s
39+
RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01+0 | FileCheck --check-prefix=SYMLINE0 %s
40+
SYMLINE0: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:12
41+
42+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01+12 | FileCheck --check-prefix=SYMLINE1 %s
43+
RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01+12 | FileCheck --check-prefix=SYMLINE1 %s
44+
SYMLINE1: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:13
45+
46+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01+19 | FileCheck --check-prefix=SYMLINE2 %s
47+
RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01+19 | FileCheck --check-prefix=SYMLINE2 %s
48+
SYMLINE2: /tmp/dbginfo{{[/\]+}}symbols.part1.cpp:14
49+
50+
# Offset can be specified with various bases.
51+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01+0x0C | FileCheck --check-prefix=SYMLINE1 %s
52+
RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01+0x0C | FileCheck --check-prefix=SYMLINE1 %s
53+
54+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01+014 | FileCheck --check-prefix=SYMLINE1 %s
55+
RUN: llvm-addr2line -e %p/Inputs/symbols.so func_01+014 | FileCheck --check-prefix=SYMLINE1 %s
56+
57+
# If '+' is not followed by a number, it is part of the symbol name, not an offset separator.
58+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01+ | FileCheck --check-prefix=NONEXISTENT %s
59+
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_01+ | FileCheck --check-prefix=NONEXISTENT %s
60+
61+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01+abc | FileCheck --check-prefix=NONEXISTENT %s
62+
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_01+abc | FileCheck --check-prefix=NONEXISTENT %s
63+
64+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so func_01+0A | FileCheck --check-prefix=NONEXISTENT %s
65+
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so func_01+0A | FileCheck --check-prefix=NONEXISTENT %s
66+
67+
# If '+' is not preceded by a symbol, it is part of a symbol name, not an offset separator.
68+
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so +0x1138 | FileCheck --check-prefix=NONEXISTENT %s
69+
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so +0x1138 | FileCheck --check-prefix=NONEXISTENT %s
70+
3771
# Show that C++ mangled names may be specified.
3872
RUN: llvm-addr2line --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s
3973
RUN: llvm-symbolizer --obj=%p/Inputs/symbols.so _ZL14static_func_01i | FileCheck --check-prefix=MULTI-CXX %s

llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ static Error makeStringError(StringRef Msg) {
159159
static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
160160
StringRef InputString, Command &Cmd,
161161
std::string &ModuleName, object::BuildID &BuildID,
162-
StringRef &Symbol, uint64_t &ModuleOffset) {
162+
StringRef &Symbol, uint64_t &Offset) {
163163
ModuleName = BinaryName;
164164
if (InputString.consume_front("CODE ")) {
165165
Cmd = Command::Code;
@@ -224,25 +224,51 @@ static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,
224224
return makeStringError("no input filename has been specified");
225225
}
226226

227-
// Parse module offset, which can be specified as a number or as a symbol.
228-
InputString = InputString.ltrim();
227+
// Parse address specification, which can be an offset in module or a
228+
// symbol with optional offset.
229+
InputString = InputString.trim();
229230
if (InputString.empty())
230231
return makeStringError("no module offset has been specified");
231232

232233
// If input string contains a space, ignore everything after it. This behavior
233234
// is consistent with GNU addr2line.
234-
int OffsetLength = InputString.find_first_of(" \n\r");
235-
StringRef Offset = InputString.substr(0, OffsetLength);
235+
int AddrSpecLength = InputString.find_first_of(" \n\r");
236+
StringRef AddrSpec = InputString.substr(0, AddrSpecLength);
237+
bool StartsWithDigit = std::isdigit(AddrSpec.front());
236238

237-
// GNU addr2line assumes the offset is hexadecimal and allows a redundant
239+
// GNU addr2line assumes the address is hexadecimal and allows a redundant
238240
// "0x" or "0X" prefix; do the same for compatibility.
239241
if (IsAddr2Line)
240-
Offset.consume_front("0x") || Offset.consume_front("0X");
242+
AddrSpec.consume_front("0x") || AddrSpec.consume_front("0X");
241243

242-
// If the input is not a number, treat it is a symbol.
243-
if (Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset)) {
244-
Symbol = Offset;
245-
ModuleOffset = 0;
244+
// If address specification is a number, treat it as a module offset.
245+
if (!AddrSpec.getAsInteger(IsAddr2Line ? 16 : 0, Offset)) {
246+
// Module offset is an address.
247+
Symbol = StringRef();
248+
return Error::success();
249+
}
250+
251+
// If address specification starts with a digit, but is not a number, consider
252+
// it as invalid.
253+
if (StartsWithDigit || AddrSpec.empty())
254+
return makeStringError("expected a number as module offset");
255+
256+
// Otherwise it is a symbol name, potentially with an offset.
257+
Symbol = AddrSpec;
258+
Offset = 0;
259+
260+
// If the address specification contains '+', try treating it as
261+
// "symbol + offset".
262+
size_t Plus = AddrSpec.rfind('+');
263+
if (Plus != StringRef::npos) {
264+
StringRef SymbolStr = AddrSpec.take_front(Plus);
265+
StringRef OffsetStr = AddrSpec.substr(Plus + 1);
266+
if (!SymbolStr.empty() && !OffsetStr.empty() &&
267+
!OffsetStr.getAsInteger(0, Offset)) {
268+
Symbol = SymbolStr;
269+
return Error::success();
270+
}
271+
// The found '+' is not an offset delimiter.
246272
}
247273

248274
return Error::success();
@@ -268,7 +294,7 @@ void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,
268294
print(SymRequest, ResOrErr, Printer);
269295
} else if (!Symbol.empty()) {
270296
Expected<std::vector<DILineInfo>> ResOrErr =
271-
Symbolizer.findSymbol(ModuleSpec, Symbol);
297+
Symbolizer.findSymbol(ModuleSpec, Symbol, Offset);
272298
print(SymRequest, ResOrErr, Printer);
273299
} else if (ShouldInline) {
274300
Expected<DIInliningInfo> ResOrErr =

llvm/unittests/ProfileData/MemProfTest.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@ class MockSymbolizer : public SymbolizableModule {
5454
virtual std::vector<DILocal> symbolizeFrame(SectionedAddress) const {
5555
llvm_unreachable("unused");
5656
}
57-
virtual std::vector<SectionedAddress> findSymbol(StringRef Symbol) const {
57+
virtual std::vector<SectionedAddress> findSymbol(StringRef Symbol,
58+
uint64_t Offset) const {
5859
llvm_unreachable("unused");
5960
}
6061
virtual bool isWin32Module() const { llvm_unreachable("unused"); }

0 commit comments

Comments
 (0)