4

I'm toying with implementing NaN tagging in a little language implementation I'm writing in C. To do this, I need to take a double and poke directly at its bits.

I have it working now using union casting:

typedef union
{
  double num;
  unsigned long bits;
} Value;

/* A mask that selects the sign bit. */
#define SIGN_BIT (1UL << 63)

/* The bits that must be set to indicate a quiet NaN. */
#define QNAN (0x7ff8000000000000L)

/* If the NaN bits are set, it's not a number. */
#define IS_NUM(value) (((value).bits & QNAN) != QNAN)

/* Convert a raw number to a Value. */
#define NUM_VAL(n) ((Value)(double)(n))

/* Convert a Value representing a number to a raw double. */
#define AS_NUM(value) (value.num)

/* Converts a pointer to an Obj to a Value. */
#define OBJ_VAL(obj) ((Value)(SIGN_BIT | QNAN | (unsigned long)(obj)))

/* Converts a Value representing an Obj pointer to a raw Obj*. */
#define AS_OBJ(value) ((Obj*)((value).bits & ~(SIGN_BIT | QNAN)))

But casting to a union type isn't standard ANSI C89. Is there a reliable way to do this that:

  1. Is -std=c89 -pedantic clean?
  2. Doesn't run afoul of strict aliasing rules?
  3. Can be used in an expression context like this:

    Value value = ...
    printf("The number value is %f\n", AS_NUM(value));
    
danfuzz
  • 4,253
  • 24
  • 34
munificent
  • 11,946
  • 2
  • 38
  • 55
  • 2
    `unsigned long` is 32 bits on many systems. You should probably use `uint64_t` from `stdint.h`: unfortunately, that is a C99 header. – gsg Nov 16 '13 at 05:13
  • Why do you want to use 1989 C? Most compilers today support at least C 1999, and aliasing through a union is supported in C 1999 (tC3). **New software should not be written in old, replaced languages** unless compelling reasons exist. – Eric Postpischil Nov 16 '13 at 11:34

1 Answers1

3

Here is a quick proof-of-concept which compiles clean and appears to run correctly for me. I use memcpy to finesse the type-pun issue. This may of course be unacceptable in a real system, but it's at least portable. Likewise, I don't know if you intend to require that AS_NUM be implemented as a macro.

#include <stdio.h>
#include <string.h>

typedef struct {
    char raw[sizeof(double)];
} Value;

static Value valueFromDouble(double d) {
    Value result;
    memcpy(result.raw, &d, sizeof(result));
    return result;
}

static double AS_NUM(Value value) {
    double result;
    memcpy(&result, value.raw, sizeof(result));
    return result;
}

int main(int argc, char **argv) {
    Value value = valueFromDouble(1.0);
    printf("The number value is %f\n", AS_NUM(value));
}

Here's a transcript of it compiling (with Clang on OS X) and running:

$ cc -std=c89 -pedantic blort.c
$ ./a.out
The number value is 1.000000
danfuzz
  • 4,253
  • 24
  • 34
  • 1
    Very nice. Making a function call and then calling memcpy and returning a copy of the result seems a bit gratuitous just to reinterpret the exact same bits in place, but looking at the assembly output, it seems like the compiler optimizes it all away. Thanks! – munificent Nov 16 '13 at 00:55