Skip to content

Commit fcf2516

Browse files
authored
Parse module information from Go binaries .buildinfo (#2193)
Summary: Parse module information from Go binaries .buildinfo Our current Go uprobe implementation relies on parsing DWARF information. This is memory intensive and is not ideal if end users want to keep the PEM's memory usage low (results in 100-150MB memory spikes). Go embeds dependency versions within the `.buildinfo` ELF section, which provides an opportunity to identify memory offsets for eBPF programs without DWARF. This is how solutions like [Open Telemetry's automatic go instrumentation](https://github.com/open-telemetry/opentelemetry-go-instrumentation/blob/cb01aca4e72b8542401faf2fcf1ceba4248b4ba5/docs/how-it-works.md#instrumentation-stability) work. By providing access to this dependency information, we can provide a lighter weight uprobe implementation for Go applications. Relevant Issues: N/A Type of change: /kind feature Test Plan: New tests verify added functionality --------- Signed-off-by: Dom Del Nano <ddelnano@gmail.com>
1 parent 244ab1b commit fcf2516

File tree

8 files changed

+316
-65
lines changed

8 files changed

+316
-65
lines changed

src/stirling/obj_tools/BUILD.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ pl_cc_test(
135135
srcs = ["go_syms_test.cc"],
136136
data = [
137137
"//src/stirling/obj_tools/testdata/go:test_binaries",
138+
"//src/stirling/obj_tools/testdata/go:test_buildinfo_with_mods",
138139
"//src/stirling/obj_tools/testdata/go:test_go_1_17_binary",
139140
"//src/stirling/obj_tools/testdata/go:test_go_1_19_binary",
140141
"//src/stirling/obj_tools/testdata/go:test_go_1_21_binary",

src/stirling/obj_tools/elf_reader.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ class ElfReader {
175175
* ElfAddressConverter::VirtualAddrToBinaryAddr is a more appropriate utility to use.
176176
*
177177
* Certain use cases may require this function, such as cases where the Go toolchain
178-
* embeds virtual addresses within a binary and must be parsed (See ReadGoBuildVersion and
178+
* embeds virtual addresses within a binary and must be parsed (See ReadGoBuildInfo and
179179
* ReadGoString in go_syms.cc).
180180
*/
181181
StatusOr<uint64_t> VirtualAddrToBinaryAddr(uint64_t virtual_addr);

src/stirling/obj_tools/go_syms.cc

Lines changed: 162 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,6 @@
1919
#include "src/stirling/obj_tools/go_syms.h"
2020
#include "src/stirling/utils/binary_decoder.h"
2121

22-
#include <utility>
23-
2422
namespace px {
2523
namespace stirling {
2624
namespace obj_tools {
@@ -70,10 +68,99 @@ StatusOr<std::string> ReadGoString(ElfReader* elf_reader, uint64_t ptr_size, uin
7068
return std::string(go_version_bytecode);
7169
}
7270

71+
// Extracts the semantic version from a Go version string (e.g., "go1.20.3").
72+
// This is how the version is formatted in the buildinfo header.
73+
StatusOr<std::string> ExtractSemVer(const std::string& input) {
74+
size_t go_pos = input.find("go"); // Find "go"
75+
if (go_pos == std::string::npos) {
76+
LOG(ERROR) << "Prefix 'go' not found in input.";
77+
return error::NotFound("Prefix 'go' not found in input.");
78+
}
79+
80+
size_t start = go_pos + 2; // Move past "go"
81+
size_t end = input.find(" ", start); // Find space delimiter after version
82+
if (end == std::string::npos) {
83+
end = input.size(); // If no space, take the rest of the string
84+
}
85+
86+
return input.substr(start, end - start);
87+
}
88+
89+
// This is modeled after go's runtime/debug package
90+
// https://github.com/golang/go/blob/93fb2c90740aef00553c9ce6a7cd4578c2469675/src/runtime/debug/mod.go#L158
91+
StatusOr<BuildInfo> ReadModInfo(const std::string& mod) {
92+
BuildInfo build_info;
93+
Module* last_module = nullptr;
94+
95+
for (std::string_view line : absl::StrSplit(mod, '\n')) {
96+
if (absl::StartsWith(line, "path\t")) {
97+
build_info.path = line.substr(5);
98+
} else if (absl::StartsWith(line, "mod\t")) {
99+
std::vector<std::string_view> mod_parts = absl::StrSplit(line.substr(4), '\t');
100+
101+
// The sum is optional, so each line must have either 2 or 3 parts.
102+
auto size = mod_parts.size();
103+
if (size != 2 && size != 3) {
104+
return error::InvalidArgument(absl::Substitute("Invalid mod line format: $0", line));
105+
}
106+
build_info.main.path = mod_parts[0];
107+
build_info.main.version = mod_parts[1];
108+
if (size == 3) {
109+
build_info.main.sum = mod_parts[2];
110+
}
111+
VLOG(2) << absl::Substitute("mod.path=$0, mod.version=$1, mod.sum=$2", build_info.main.path,
112+
build_info.main.version, build_info.main.sum);
113+
last_module = &build_info.main;
114+
} else if (absl::StartsWith(line, "dep\t")) {
115+
std::vector<std::string_view> dep_parts = absl::StrSplit(line.substr(4), '\t');
116+
117+
// The sum is optional, so each line must have either 2 or 3 parts.
118+
auto size = dep_parts.size();
119+
if (size != 2 && size != 3) {
120+
return error::InvalidArgument(absl::Substitute("Invalid dep line format: $0", line));
121+
}
122+
Module dep;
123+
dep.path = dep_parts[0];
124+
dep.version = dep_parts[1];
125+
if (size == 3) {
126+
dep.sum = dep_parts[2];
127+
}
128+
129+
build_info.deps.push_back(std::move(dep));
130+
last_module = &build_info.deps.back();
131+
132+
VLOG(2) << absl::Substitute("dep.path=$0, dep.version=$1, dep.sum=$2", dep.path, dep.version,
133+
dep.sum);
134+
135+
} else if (absl::StartsWith(line, "=>\t")) {
136+
if (last_module == nullptr) {
137+
return error::InvalidArgument(
138+
"Unexpected module replacement line with no preceding module.");
139+
}
140+
std::vector<std::string_view> replace_parts = absl::StrSplit(line.substr(3), '\t');
141+
142+
if (replace_parts.size() != 3) {
143+
return error::InvalidArgument(
144+
absl::Substitute("Invalid module replacement line format: $0", line));
145+
}
146+
147+
std::unique_ptr<Module> replacement = std::make_unique<Module>();
148+
replacement->path = replace_parts[0];
149+
replacement->version = replace_parts[1];
150+
replacement->sum = replace_parts[2];
151+
last_module->replace = std::move(replacement);
152+
}
153+
// TODO(ddelnano): Handle the build flags line in the future
154+
// (https://github.com/golang/go/blob/93fb2c90740aef00553c9ce6a7cd4578c2469675/src/runtime/debug/mod.go#L171).
155+
// This is omitted for now since it doesn't help with Go uprobes.
156+
}
157+
return build_info;
158+
}
159+
73160
// Reads the buildinfo header embedded in the .go.buildinfo ELF section in order to determine the go
74161
// toolchain version. This function emulates what the go version cli performs as seen
75162
// https://github.com/golang/go/blob/cb7a091d729eab75ccfdaeba5a0605f05addf422/src/debug/buildinfo/buildinfo.go#L151-L221
76-
StatusOr<std::string> ReadGoBuildVersion(ElfReader* elf_reader) {
163+
StatusOr<std::pair<std::string, BuildInfo>> ReadGoBuildInfo(ElfReader* elf_reader) {
77164
PX_ASSIGN_OR_RETURN(ELFIO::section * section, elf_reader->SectionWithName(kGoBuildInfoSection));
78165
int offset = section->get_offset();
79166
PX_ASSIGN_OR_RETURN(std::string_view buildInfoByteCode,
@@ -85,63 +172,90 @@ StatusOr<std::string> ReadGoBuildVersion(ElfReader* elf_reader) {
85172
PX_ASSIGN_OR_RETURN(uint8_t ptr_size, binary_decoder.ExtractBEInt<uint8_t>());
86173
PX_ASSIGN_OR_RETURN(uint8_t endianness, binary_decoder.ExtractBEInt<uint8_t>());
87174

175+
BuildInfo build_info;
176+
std::string go_version;
177+
std::string mod_info;
88178
// If the endianness has its second bit set, then the go version immediately follows the 32 bit
89179
// header specified by the varint encoded string data
90180
if ((endianness & 0x2) != 0) {
91181
// Skip the remaining 16 bytes of buildinfo header
92182
PX_CHECK_OK(binary_decoder.ExtractBufIgnore(16));
93183

94184
PX_ASSIGN_OR_RETURN(uint64_t size, binary_decoder.ExtractUVarInt());
95-
PX_ASSIGN_OR_RETURN(std::string_view go_version, binary_decoder.ExtractString(size));
96-
return std::string(go_version);
97-
}
98-
99-
read_ptr_func_t read_ptr;
100-
switch (endianness) {
101-
case 0x0: {
102-
if (ptr_size == 4) {
103-
read_ptr = [&](u8string_view str_view) {
104-
return utils::LEndianBytesToInt<uint32_t, 4>(str_view);
105-
};
106-
} else if (ptr_size == 8) {
107-
read_ptr = [&](u8string_view str_view) {
108-
return utils::LEndianBytesToInt<uint64_t, 8>(str_view);
109-
};
110-
} else {
111-
return error::NotFound(absl::Substitute(
112-
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
185+
PX_ASSIGN_OR_RETURN(go_version, binary_decoder.ExtractString(size));
186+
187+
PX_ASSIGN_OR_RETURN(uint64_t mod_size, binary_decoder.ExtractUVarInt());
188+
PX_ASSIGN_OR_RETURN(mod_info, binary_decoder.ExtractString(mod_size));
189+
} else {
190+
read_ptr_func_t read_ptr;
191+
switch (endianness) {
192+
case 0x0: {
193+
if (ptr_size == 4) {
194+
read_ptr = [&](u8string_view str_view) {
195+
return utils::LEndianBytesToInt<uint32_t, 4>(str_view);
196+
};
197+
} else if (ptr_size == 8) {
198+
read_ptr = [&](u8string_view str_view) {
199+
return utils::LEndianBytesToInt<uint64_t, 8>(str_view);
200+
};
201+
} else {
202+
return error::NotFound(absl::Substitute(
203+
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
204+
}
205+
break;
113206
}
114-
break;
115-
}
116-
case 0x1:
117-
if (ptr_size == 4) {
118-
read_ptr = [&](u8string_view str_view) {
119-
return utils::BEndianBytesToInt<uint64_t, 4>(str_view);
120-
};
121-
} else if (ptr_size == 8) {
122-
read_ptr = [&](u8string_view str_view) {
123-
return utils::BEndianBytesToInt<uint64_t, 8>(str_view);
124-
};
125-
} else {
126-
return error::NotFound(absl::Substitute(
127-
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
207+
case 0x1:
208+
if (ptr_size == 4) {
209+
read_ptr = [&](u8string_view str_view) {
210+
return utils::BEndianBytesToInt<uint64_t, 4>(str_view);
211+
};
212+
} else if (ptr_size == 8) {
213+
read_ptr = [&](u8string_view str_view) {
214+
return utils::BEndianBytesToInt<uint64_t, 8>(str_view);
215+
};
216+
} else {
217+
return error::NotFound(absl::Substitute(
218+
"Binary reported pointer size=$0, refusing to parse non go binary", ptr_size));
219+
}
220+
break;
221+
default: {
222+
auto msg =
223+
absl::Substitute("Invalid endianness=$0, refusing to parse non go binary", endianness);
224+
DCHECK(false) << msg;
225+
return error::NotFound(msg);
128226
}
129-
break;
130-
default: {
131-
auto msg =
132-
absl::Substitute("Invalid endianness=$0, refusing to parse non go binary", endianness);
133-
DCHECK(false) << msg;
134-
return error::NotFound(msg);
135227
}
136-
}
137228

138-
// Reads the virtual address location of the runtime.buildVersion symbol.
139-
PX_ASSIGN_OR_RETURN(auto runtime_version_vaddr,
140-
binary_decoder.ExtractString<u8string_view::value_type>(ptr_size));
141-
PX_ASSIGN_OR_RETURN(uint64_t ptr_addr,
142-
elf_reader->VirtualAddrToBinaryAddr(read_ptr(runtime_version_vaddr)));
229+
// Reads the virtual address location of the runtime.buildVersion symbol.
230+
PX_ASSIGN_OR_RETURN(auto runtime_version_vaddr,
231+
binary_decoder.ExtractString<u8string_view::value_type>(ptr_size));
232+
PX_ASSIGN_OR_RETURN(auto mod_info_vaddr,
233+
binary_decoder.ExtractString<u8string_view::value_type>(ptr_size));
234+
PX_ASSIGN_OR_RETURN(uint64_t ptr_addr,
235+
elf_reader->VirtualAddrToBinaryAddr(read_ptr(runtime_version_vaddr)));
236+
237+
PX_ASSIGN_OR_RETURN(go_version, ReadGoString(elf_reader, ptr_size, ptr_addr, read_ptr));
238+
239+
auto mod_ptr_addr_s = elf_reader->VirtualAddrToBinaryAddr(read_ptr(mod_info_vaddr));
240+
if (mod_ptr_addr_s.ok()) {
241+
PX_ASSIGN_OR_RETURN(mod_info, ReadGoString(elf_reader, ptr_size,
242+
mod_ptr_addr_s.ConsumeValueOrDie(), read_ptr));
243+
}
244+
}
143245

144-
return ReadGoString(elf_reader, ptr_size, ptr_addr, read_ptr);
246+
auto mod_size = mod_info.size();
247+
if (mod_size > 0) {
248+
// The module info string is delimited by the sentinel strings cmd/go/internal/modload.infoStart
249+
// and infoEnd. These strings are 16 characters long, so first check that the module info
250+
// contains more than the sentinel strings. This check reflects upstream's implementation
251+
// https://github.com/golang/go/blob/cb7a091d729eab75ccfdaeba5a0605f05addf422/src/debug/buildinfo/buildinfo.go#L214-L215
252+
if (mod_size >= 33 && mod_info.at(mod_size - 17) == '\n') {
253+
mod_info.erase(0, 16);
254+
PX_ASSIGN_OR_RETURN(build_info, ReadModInfo(mod_info));
255+
}
256+
}
257+
PX_ASSIGN_OR_RETURN(auto s, ExtractSemVer(go_version));
258+
return std::make_pair(s, std::move(build_info));
145259
}
146260

147261
// Prefixes used to search for itable symbols in the binary. Follows the format:

src/stirling/obj_tools/go_syms.h

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,10 @@
1818

1919
#pragma once
2020

21+
#include <memory>
2122
#include <string>
2223
#include <string_view>
24+
#include <utility>
2325
#include <vector>
2426

2527
#include <absl/container/flat_hash_map.h>
@@ -33,11 +35,24 @@ namespace obj_tools {
3335
// Returns true if the executable is built by Golang.
3436
bool IsGoExecutable(ElfReader* elf_reader);
3537

36-
// Returns the build version of a Golang executable. The executable is read through the input
37-
// elf_reader.
38-
// TODO(yzhao): We'll use this to determine the corresponding Golang executable's TLS data
39-
// structures and their offsets.
40-
StatusOr<std::string> ReadGoBuildVersion(ElfReader* elf_reader);
38+
struct Module {
39+
std::string path;
40+
std::string version;
41+
std::string sum;
42+
std::unique_ptr<Module> replace = nullptr;
43+
};
44+
45+
struct BuildInfo {
46+
std::string path;
47+
Module main;
48+
std::vector<Module> deps;
49+
std::vector<std::pair<std::string, std::string>> settings;
50+
};
51+
52+
StatusOr<BuildInfo> ReadModInfo(const std::string& mod);
53+
// Returns the build version and buildinfo of a Golang executable. The executable is read through
54+
// the input elf_reader.
55+
StatusOr<std::pair<std::string, BuildInfo>> ReadGoBuildInfo(ElfReader* elf_reader);
4156

4257
// Describes a Golang type that implement an interface.
4358
struct IntfImplTypeInfo {

0 commit comments

Comments
 (0)