2

Updated question - first version below

I have set up a custom signal handler to buffer frames being removed from a CAN interface. In the handler, I block the signal first, and unblock it last, using pthread_sigmask().

In bufferMessageDirect(), the interface ID iface_id is checked, and in case of error, because this can only normally be my fault, assert(0) is called.

After my programme has been running for some time, but the exact time varies, with a stream of CAN frames arriving, this assert() is tripped and execution ends. When I inspect the signal handler stack frame, iface_id is 0 which is valid, but the argument can_interface_id as passed into bufferMessageDirect() is -1.

I have a dump of the incoming CAN frames, and there is nothing unusual about them. The stack frame for the signal handler indicates that the converted CAN message is as expected and the the interface ID is 0. In the stack frame of bufferMessageDirect(), the interface ID is -1.

Please would you offer advice on what might be causing this issue?

I added the calls to pthread_sigmask() because I thought that, maybe, the signal was firing while its handler was already in progress.

/************************************************************** EXTERNAL DATA */
extern can_metadata_t  can_metadata;
extern internal_time_t otherthing_heartbeat[THING_QUANTITY];
extern internal_time_t something_heartbeat[THING_QUANTITY];


/************************************************************** INTERNAL DATA */
STATIC volatile ssize_t          bytes_read  = 0;
STATIC volatile int              socket_desc = 0;
STATIC volatile struct can_frame frame       = {0};
STATIC volatile uint32_t         receiver_id = 0u;
STATIC volatile uint32_t         command_id  = 0u;
STATIC volatile uint32_t         function_id = 0u;
STATIC volatile int32_t          iface_id    = 0;
STATIC volatile cand_result_t    d_result    = CAND_RESULT_OK;
STATIC volatile canh_result_t    h_result    = CANH_RESULT_OK;

STATIC volatile internal_time_t *const heartbeats[2] =
{
    otherthing_heartbeat,
    something_heartbeat,
};

// ============================================================================
void
myHandler(int       const        signal_number,
          siginfo_t       *const p_signal_info,
          void            *const p_ucontext)
{
    uint8_t       ii           = 0u;
    uint8_t       thing_id     = 0u;
    bool          is_something = 0u;
    can_message_t message      = {0};
    int           std_result = 0;

    /* Mark as unwanted */
    (void) p_ucontext;

    if ((HANDLERS_SIGNUM_CAN_RX != signal_number) ||
        (NULL                   == p_signal_info))
    {
        /* No penalty for these conditions */
        return;
    }

    else
    {
        /* Block this signal */
        std_result = pthread_sigmask(SIG_BLOCK, &signal_set_can, NULL);

        /* This result is asserted because the only failure is if the SIG_BLOCK
         * action is invalid
         */
        assert(0 == std_result);

        socket_desc = p_signal_info->si_fd;
        bytes_read = read(socket_desc, &frame, sizeof(frame));

        if (bytes_read != sizeof(frame))
        {
            // ...
            goto unblock_signal;
        }
    }

    /* Is this an error frame? */
    if ((frame.can_id & CAN_ERR_FLAG) != 0u)
    {
        // ...
        goto unblock_signal;
    }

    /* Is this a frame with an 11-bit ID? */
    else if ((frame.can_id & CAN_EFF_FLAG) == 0u)
    {
        // ...
        goto unblock_signal;
    }

    /* Is this a frame with a 29-bit ID? */
    else
    {
        function_id = frame.can_id & CAN_EFF_MASK;
        command_id  = function_id;
        receiver_id = function_id;

        function_id &= MASK_FUNCTION_ID;
        function_id >>= POSITION_FUNCTION_ID;

        command_id &= MASK_COMMAND_ID;
        command_id >>= POSITION_COMMAND_ID;

        receiver_id &= MASK_RECEIVER_ID;

        thing_id = (frame.can_id & MASK_THING_ID) - 1u;
        is_something  = frame.can_id & MASK_RECEIVER_IS_SOMETHING;

        /* Update the housekeeping stats */
        if (function_id < FUNCTIONCODE_QUANTITY)
        {
            delivered_for_function_id[function_id]++;
        }
        else
        {
            delivered_for_function_id[FUNCTIONCODE_QUANTITY]++;
        }
    }

    /* Handle emergency messages */
    if (FUNCTIONCODE_EMGC == function_id)
    {
        // ...
        goto unblock_signal;
    }

    /* Handle heartbeats */
    if (FUNCTIONCODE_HB == function_id)
    {
        // Gets time from CLOCK_MONOTONIC_RAW and converts to microseconds
        (void) getRawTimeFormatInternal(&(heartbeats[is_something][thing_id]));

        goto unblock_signal;
    }

    /* Discard anything but Responses */
    if (FUNCTIONCODE_RESPONSE != function_id)
    {
        // ...
        goto unblock_signal;
    }

    /* Make the busy something available again */
    if (true == is_something)
    {
        something_busy_bits &= ~(1 << thing_id);
    }

    /* Otherwise, find the interface ID and push to the buffer */
    iface_id = -1;

    /* Find buffer first */
    for (ii = 0u; ii < CAN_INTERFACE_QUANTITY; ii++)
    {
        if (can_metadata.socket[ii] == socket_desc)
        {
            iface_id = (int32_t) ii;
            break;
        }
    }

    if (-1 == iface_id)
    {
        goto unblock_signal;
    }

    /* Otherwise convert and buffer */
    h_result = canFrameToMessage(&message, &frame);

    if (CAN_RESULT_OK != h_result)
    {
        // ...
        goto unblock_signal;
    }

    d_result = bufferMessageDirect((can_interface_id_t) iface_id, &message);

    if (CAN_RESULT_OK != d_result)
    {
        // ...
        assert(0);
    }

    // ........................................................................
    unblock_signal:

    /* Unblock this signal */
    std_result = pthread_sigmask(SIG_UNBLOCK, &signal_set_can, NULL);

    /* This result is asserted because the only failure is if the SIG_BLOCK
     * action is invalid
     */
    assert(0 == std_result);
}

// ============================================================================
cand_result_t
bufferMessageDirect(
    can_interface_id_t const        can_interface_id,
    can_message_t      const *const p_message)
{
    canh_result_t h_result = CANH_RESULT_OK;
    cand_result_t result   = CAND_RESULT_OK;

    h_result = validateInterfaceId(can_interface_id);

    if (CANH_RESULT_OK != h_result)
    {
        result = CAND_RESULT_INTERNAL_ERROR;
        assert(0); // This is tripped, because can_interface_id is -1
    }

    // ...
    // Push into buffer (call to buffer utility)

    return result;
}

Old question

I have set up a custom signal handler to buffer frames being removed from a CAN interface. In the handler, I block the signal first, and unblock it last, using pthread_sigmask().

The basic recipe is:

void
myHandler(
    int       const        signal_number,
    siginfo_t       *const p_signal_info,
    void            *const p_ucontext)
{
  check_signal_info();
  pthread_sigmask(SIG_BLOCK, set_of_this_signal_only);
  read(socket, &can_frame, sizeof(can_frame));
  derive_can_iface_id(&iface_id);

  buffer_push((iface_enum_type) iface_id, can_frame);

  pthread_sigmask(SIG_UNBLOCK, set_of_this_signal_only);
}

In buffer_push(), the interface ID iface_id is checked, and in case of error, because this can only normally be my fault, assert(0) is called.

After my programme has been running for some time, but the exact time varies, with a stream of CAN frames arriving, this assert() is tripped and execution ends. When I inspect the signal handler stack frame, iface_id is 0 which is valid, but the argument as passed into buffer_push() is -1.

I added the calls to pthread_sigmask() because I thought that, maybe, the signal was firing while its handler was already in progress.

The prototype for buffer_push() is:

result_enum_type
buffer_push(
    iface_enum_type const        can_interface_id,
    can_frame_type  const *const p_message)

iface_id is declared outside the function like so:

static volatile uint32_t iface_id = 0;

For the avoidance of doubt, below the call to buffer_push() it is all my own code --- no external calls. Also, I have a dump of the incoming CAN frames, and this signal handler correctly parses each one.

Please would you offer advice on what might be causing this issue?

Walkingbeard
  • 590
  • 5
  • 15
  • 1
    This is why friends don't let friends use signals - except with signalfd. – user253751 Jan 23 '23 at 20:26
  • 1
    Your call to `pthread_sigmask(SIG_UNBLOCK, set_of_this_signal_only);` will permit recursion risking stack overflow, Recursion is blocked by default. – Timothy Baldwin Jan 23 '23 at 21:09
  • @chux-ReinstateMonica, is this better? I have left out the actual push into the circular buffer, because a) much more code, b) in this problem, execution never reaches that point. – Walkingbeard Jan 24 '23 at 10:11
  • @user253751 So, if I understand this correctly, `signalfd` essential creates a file in which signals are buffered, and then you handle by polling until empty? – Walkingbeard Jan 24 '23 at 10:34
  • And **all** of that code you left out is completely async-signal-safe? FWIW, [`assert()` is **not** async-signal-safe](https://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03). – Andrew Henle Jan 24 '23 at 10:36
  • @TimothyBaldwin, I don't follow. Recursion implies that this handler calls itself. Do you mean that multiple instances of the signal could be handled at once? That is what the calls to `pthread_sigmask` are there to prevent. – Walkingbeard Jan 24 '23 at 10:38
  • 1
    `STATIC` instead of `static` is for [The International Obfuscated C Code Contest](https://www.ioccc.org/), I assume? That's the only rationale I can come up with. – Lundin Jan 24 '23 at 10:44
  • @Walkingbeard Basically yes, but the signals are not buffered *in* the signalfd, rather the signalfd allows you to read from the signal buffer that already exists in the signal system. When using signalfd, you should block the signals, so they stay in the buffer instead of calling the signal handler, and then you can read them through the signalfd. – user253751 Jan 25 '23 at 15:57
  • @Lundin Some codebases use a macro either for compatibility with weird compilers, or weird debuggers. – user253751 Jan 25 '23 at 16:28
  • `STATIC` is `static` in release and nothing in testing builds – Walkingbeard Jan 25 '23 at 16:44
  • @Walkingbeard When you call `pthread_sigmask(SIG_UNBLOCK, set_of_this_signal_only)` will call the signal handler immediately if the signal is pending, without it the signal handler will only be called again after it exits. – Timothy Baldwin Jan 27 '23 at 11:24
  • As far as I can see, sometimes the signal *was* pending, and the handler would be called again during that second call to `pthread_sigmask()`, presumably at a point after the unblocking and before the return. I think that if this is a problem, it surely arises from the same aspects of signal handling that other people have mentioned above, and that this will probably result in me ditching asynchronous CAN reception completely. – Walkingbeard Jan 27 '23 at 13:09

1 Answers1

1

How can the value of an argument in a function called from a signal handler be different from the value passed in?

How can that happen? Because the C standard says it can:

When the processing of the abstract machine is interrupted by receipt of a signal, the values of objects that are neither lock-free atomic objects nor of type volatile sig_atomic_t are unspecified, as is the state of the floating-point environment. The value of any object modified by the handler that is neither a lock-free atomic object nor of type volatile sig_atomic_t becomes indeterminate when the handler exits, as does the state of the floating-point environment if it is modified by the handler and not restored to its original state.

None of the following are either "lock-free atomic obects" or of "type volatile sig_atomic_t":

STATIC volatile ssize_t          bytes_read  = 0;
STATIC volatile int              socket_desc = 0;
STATIC volatile struct can_frame frame       = {0};
STATIC volatile uint32_t         receiver_id = 0u;
STATIC volatile uint32_t         command_id  = 0u;
STATIC volatile uint32_t         function_id = 0u;
STATIC volatile int32_t          iface_id    = 0;
STATIC volatile cand_result_t    d_result    = CAND_RESULT_OK;
STATIC volatile canh_result_t    h_result    = CANH_RESULT_OK;

STATIC volatile internal_time_t *const heartbeats[2] =
{
    otherthing_heartbeat,
    something_heartbeat,
};

Every single one of those variables has indeterminate value when your signal handler is invoked, and every one of those variables will become indeterminate when your signal handler returns if they're modified in the signal handler.

Note also footnote 188 of the (draft) C11 standard:

Thus, a signal handler cannot, in general, call standard library functions.

Basically, you can't safely do much of anything in a signal handler. Strictly-conforming C code can only modify objects as noted above. POSIX extends what a signal handler can safely do to calling only functions that are async-signal-safe - a very limited set. Windows has similar provisions. The Linux signal-safety man page provides the Linux-specific list of functions that are async-signal-safe on Linux.

What's potentially happening is there are race conditions between your normal processing and your signal processing. It works most of the time, but every now and then one of those race conditions appears and you get corrupted values.

Andrew Henle
  • 32,625
  • 3
  • 24
  • 56