During development of a pipeline in Halide, I want to avoid unnecessary checks on buffer layouts. I know I can turn off the majority of assertions using the 'no_asserts' target feature.
However, I generated the following simple code:
#define LUT_SIZE 17 /* Size in each dimension of the 4D LUT */
class ApplyLut : public Halide::Generator<ApplyLut> {
public:
// We declare the Inputs to the Halide pipeline as public
// member variables. They'll appear in the signature of our generated
// function in the same order as we declare them.
Input < Buffer<uint8_t>> Lut { "Lut" , 1}; // LUT to apply
Input < Buffer<int>> indexToLut { "indexToLut" , 1}; // Precalculated mapping of uint8_t to LUT index
Input < Buffer<uint8_t >> inputImageLine { "inputImageLine" , 1}; // Input line
Output< Buffer<uint8_t >> outputImageLine { "outputImageLine", 1}; // Output line
void generate();
};
HALIDE_REGISTER_GENERATOR(ApplyLut, outputImageLine)
void ApplyLut::generate()
{
Var x("x");
outputImageLine(x) = Lut(clamp(indexToLut(inputImageLine(x)), 0, LUT_SIZE));
inputImageLine .dim(0).set_min(0); // Input image sample index
inputImageLine .dim(0).set_stride(1); // Input image sample index
outputImageLine.dim(0).set_bounds(0, inputImageLine.dim(0).extent()); // Output line matches input line
outputImageLine.dim(0).set_stride( inputImageLine.dim(0).stride()); // Output line matches input line
Lut .dim(0).set_bounds(0, LUT_SIZE); //iccLut[...]: , limited number of values
Lut .dim(0).set_stride(1); //iccLut[...]: , limited number of values
indexToLut .dim(0).set_bounds(0, 256); //chan4_offset[...]: value index: 256 values
indexToLut .dim(0).set_stride(1); //chan4_offset[...]: value index: 256 values
}
Among others, I used the target feature 'no_assert' during generation (as can be seen in the output). I then get the following output code:
module name=applyIccProfile, target=x86-64-windows-disable_llvm_loop_opt-mingw-no_asserts-no_bounds_query-no_runtime-sse41 {
func applyIccProfile(Lut, indexToLut, inputImageLine, outputImageLine) {
assert((reinterpret(outputImageLine.buffer) != (uint64)0), halide_error_buffer_argument_is_null("outputImageLine"))
assert((reinterpret(inputImageLine.buffer) != (uint64)0), halide_error_buffer_argument_is_null("inputImageLine"))
assert((reinterpret(indexToLut.buffer) != (uint64)0), halide_error_buffer_argument_is_null("indexToLut"))
assert((reinterpret(Lut.buffer) != (uint64)0), halide_error_buffer_argument_is_null("Lut"))
let Lut = _halide_buffer_get_host(Lut.buffer)
let Lut.min.0 = _halide_buffer_get_min(Lut.buffer, 0)
let Lut.extent.0 = _halide_buffer_get_extent(Lut.buffer, 0)
let Lut.stride.0 = _halide_buffer_get_stride(Lut.buffer, 0)
let indexToLut = _halide_buffer_get_host(indexToLut.buffer)
let indexToLut.min.0 = _halide_buffer_get_min(indexToLut.buffer, 0)
let indexToLut.extent.0 = _halide_buffer_get_extent(indexToLut.buffer, 0)
let indexToLut.stride.0 = _halide_buffer_get_stride(indexToLut.buffer, 0)
let inputImageLine = _halide_buffer_get_host(inputImageLine.buffer)
let inputImageLine.min.0 = _halide_buffer_get_min(inputImageLine.buffer, 0)
let inputImageLine.extent.0 = _halide_buffer_get_extent(inputImageLine.buffer, 0)
let inputImageLine.stride.0 = _halide_buffer_get_stride(inputImageLine.buffer, 0)
let outputImageLine = _halide_buffer_get_host(outputImageLine.buffer)
let outputImageLine.min.0 = _halide_buffer_get_min(outputImageLine.buffer, 0)
let outputImageLine.extent.0 = _halide_buffer_get_extent(outputImageLine.buffer, 0)
let outputImageLine.stride.0 = _halide_buffer_get_stride(outputImageLine.buffer, 0)
assert((Lut.stride.0 == 1), 0)
assert((Lut.min.0 == 0), 0)
assert((Lut.extent.0 == 17), 0)
assert((indexToLut.stride.0 == 1), 0)
assert((indexToLut.min.0 == 0), 0)
assert((indexToLut.extent.0 == 256), 0)
assert((inputImageLine.stride.0 == 1), 0)
assert((inputImageLine.min.0 == 0), 0)
assert((outputImageLine.stride.0 == 1), 0)
assert((outputImageLine.min.0 == 0), 0)
assert((outputImageLine.extent.0 == inputImageLine.extent.0), 0)
produce outputImageLine {
for (outputImageLine.s0.x, 0, inputImageLine.extent.0) {
outputImageLine[outputImageLine.s0.x] = Lut[max(min(indexToLut[int32(inputImageLine[outputImageLine.s0.x])], 17), 0)]
}
}
}
}
In the generated output a number of assertions are present that check the dimensions of the buffers that are provided.
I know that these assertions are executed 'only' once for each call.
However, given the number of calls I would like to turn off these assertions,
because of the execution overhead.
So the questions are:
- How can I turn off the assignments w.r.t. min/extent/stride for those that are already known (because these were set in the generator code)?
- How can I turn off the generation of these assertions?