I want to trace execution paths for a program for multiple inputs. To generate inputs I currently use coverage-guided fuzzing - I first tried AFL and now use Libfuzzer. To record execution paths I looked at XRay and SanitizerCoverage which both leverage LLVM instrumentation as far as I understood; the latter is used internally by Libfuzzer for its coverage metrics.
If I use XRay with Libfuzzer, I receive error messages because of multiple function definitions:
#include <xray/xray_log_interface.h>
extern "C" {
int LLVMFuzzerRunDriver(int *argc, char ***argv, int (*UserCb)(const uint8_t *Data, size_t Size));
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
{
return 0;
}
int main(int argc, char** argv) {
return LLVMFuzzerRunDriver(&argc, &argv, &LLVMFuzzerTestOneInput);
}
clang++-13 -o test test.cpp -fxray-instrument -fxray-instruction-threshold=1 -fsanitize=fuzzer-no-link -L /usr/lib/clang/13.0.0/lib/linux/ -l clang_rt.fuzzer_no_main-x86_64
If I use Libfuzzer with the SanitizerCoverage callback:
#include <cstdlib>
#include <inttypes.h>
#include <sanitizer/coverage_interface.h>
extern "C" void __sanitizer_cov_trace_pc_guard(uint32_t *guard) {
if (!*guard) return;
return;
}
extern "C" {
int LLVMFuzzerRunDriver(int *argc, char ***argv, int (*UserCb)(const uint8_t *Data, size_t Size));
};
extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
{
return 0;
}
int main(int argc, char** argv) {
return LLVMFuzzerRunDriver(&argc, &argv, &LLVMFuzzerTestOneInput);
}
$ clang++-13 -o test2 test2.cpp -fsanitize=fuzzer-no-link -fsanitize-coverage=trace-pc-guard -L /usr/lib/clang/13.0.0/lib/linux/ -l clang_rt.fuzzer_no_main-x86_64
/usr/bin/ld: /usr/lib/clang/13.0.0/lib/linux//libclang_rt.fuzzer_no_main-x86_64.a(fuzzer_no_main.o): in function `__sanitizer_cov_trace_pc_guard':
(.text.__sanitizer_cov_trace_pc_guard+0x0): multiple definition of `__sanitizer_cov_trace_pc_guard'; /tmp/test2-047590.o:test2.cpp:(.text+0x0): first defined here
Is it possible to combine out-of-the-box fuzzing (for generating 'interesting' inputs) and execution path tracing or do I have to resort to something more demanding like writing my own LLVM pass.