0

During development of a pipeline in Halide, I want to avoid unnecessary checks on buffer layouts. I know I can turn off the majority of assertions using the 'no_asserts' target feature.

However, I generated the following simple code:

#define LUT_SIZE 17     /* Size in each dimension of the 4D LUT */

class ApplyLut : public Halide::Generator<ApplyLut> {
public:
    // We declare the Inputs to the Halide pipeline as public
    // member variables. They'll appear in the signature of our generated
    // function in the same order as we declare them.
  Input <  Buffer<uint8_t>> Lut              { "Lut"            , 1};  // LUT to apply
  Input <  Buffer<int>> indexToLut           { "indexToLut"     , 1};  // Precalculated mapping of uint8_t to LUT index
  Input <  Buffer<uint8_t >> inputImageLine  { "inputImageLine" , 1};  // Input line
  Output<  Buffer<uint8_t >> outputImageLine { "outputImageLine", 1};  // Output line
  void generate();
};

HALIDE_REGISTER_GENERATOR(ApplyLut, outputImageLine)

void ApplyLut::generate()
{
  Var x("x");

  outputImageLine(x) = Lut(clamp(indexToLut(inputImageLine(x)), 0, LUT_SIZE));

  inputImageLine .dim(0).set_min(0);         // Input image sample index
  inputImageLine .dim(0).set_stride(1);         // Input image sample index
  outputImageLine.dim(0).set_bounds(0, inputImageLine.dim(0).extent()); // Output line matches input line
  outputImageLine.dim(0).set_stride(   inputImageLine.dim(0).stride()); // Output line matches input line
  Lut            .dim(0).set_bounds(0, LUT_SIZE);          //iccLut[...]: , limited number of values
  Lut            .dim(0).set_stride(1);                    //iccLut[...]: , limited number of values
  indexToLut     .dim(0).set_bounds(0, 256);               //chan4_offset[...]: value index: 256 values
  indexToLut     .dim(0).set_stride(1);                    //chan4_offset[...]: value index: 256 values
}

Among others, I used the target feature 'no_assert' during generation (as can be seen in the output). I then get the following output code:

module name=applyIccProfile, target=x86-64-windows-disable_llvm_loop_opt-mingw-no_asserts-no_bounds_query-no_runtime-sse41 {
  func applyIccProfile(Lut, indexToLut, inputImageLine, outputImageLine) {
    assert((reinterpret(outputImageLine.buffer) != (uint64)0), halide_error_buffer_argument_is_null("outputImageLine"))
    assert((reinterpret(inputImageLine.buffer) != (uint64)0), halide_error_buffer_argument_is_null("inputImageLine"))
    assert((reinterpret(indexToLut.buffer) != (uint64)0), halide_error_buffer_argument_is_null("indexToLut"))
    assert((reinterpret(Lut.buffer) != (uint64)0), halide_error_buffer_argument_is_null("Lut"))
    let Lut = _halide_buffer_get_host(Lut.buffer)
    let Lut.min.0 = _halide_buffer_get_min(Lut.buffer, 0)
    let Lut.extent.0 = _halide_buffer_get_extent(Lut.buffer, 0)
    let Lut.stride.0 = _halide_buffer_get_stride(Lut.buffer, 0)
    let indexToLut = _halide_buffer_get_host(indexToLut.buffer)
    let indexToLut.min.0 = _halide_buffer_get_min(indexToLut.buffer, 0)
    let indexToLut.extent.0 = _halide_buffer_get_extent(indexToLut.buffer, 0)
    let indexToLut.stride.0 = _halide_buffer_get_stride(indexToLut.buffer, 0)
    let inputImageLine = _halide_buffer_get_host(inputImageLine.buffer)
    let inputImageLine.min.0 = _halide_buffer_get_min(inputImageLine.buffer, 0)
    let inputImageLine.extent.0 = _halide_buffer_get_extent(inputImageLine.buffer, 0)
    let inputImageLine.stride.0 = _halide_buffer_get_stride(inputImageLine.buffer, 0)
    let outputImageLine = _halide_buffer_get_host(outputImageLine.buffer)
    let outputImageLine.min.0 = _halide_buffer_get_min(outputImageLine.buffer, 0)
    let outputImageLine.extent.0 = _halide_buffer_get_extent(outputImageLine.buffer, 0)
    let outputImageLine.stride.0 = _halide_buffer_get_stride(outputImageLine.buffer, 0)

    assert((Lut.stride.0 == 1), 0)
    assert((Lut.min.0 == 0), 0)
    assert((Lut.extent.0 == 17), 0)
    assert((indexToLut.stride.0 == 1), 0)
    assert((indexToLut.min.0 == 0), 0)
    assert((indexToLut.extent.0 == 256), 0)
    assert((inputImageLine.stride.0 == 1), 0)
    assert((inputImageLine.min.0 == 0), 0)
    assert((outputImageLine.stride.0 == 1), 0)
    assert((outputImageLine.min.0 == 0), 0)
    assert((outputImageLine.extent.0 == inputImageLine.extent.0), 0)
    produce outputImageLine {
      for (outputImageLine.s0.x, 0, inputImageLine.extent.0) {
        outputImageLine[outputImageLine.s0.x] = Lut[max(min(indexToLut[int32(inputImageLine[outputImageLine.s0.x])], 17), 0)]
      }
    }
  }
}

In the generated output a number of assertions are present that check the dimensions of the buffers that are provided.

I know that these assertions are executed 'only' once for each call.
However, given the number of calls I would like to turn off these assertions, because of the execution overhead.

So the questions are:

  • How can I turn off the assignments w.r.t. min/extent/stride for those that are already known (because these were set in the generator code)?
  • How can I turn off the generation of these assertions?
PJM Aarts
  • 5
  • 3

1 Answers1

0

While asserts still show up in the Halide IR with no_asserts on, any remaining ones get stripped in the final lowering to LLVM IR. They just exist in the Halide IR because they let the Halide simplifier know that something can be assumed to be true after that point in the code, but they compile to a no-op.

With the asserts gone, LLVM will dead-code-eliminate the unnecessary assignments. I'd check the generated assembly rather than the Halide IR to be sure all of those checks are gone.

Andrew Adams
  • 1,396
  • 7
  • 3