3

I want to check the largest integer that can be held in various floating-point types in C without loss of precision. Here is a test program:

#include <stdio.h>
#include <stdlib.h>
#include <float.h>

#define FLOATTYPE long double
#define ONE ((FLOATTYPE)1.0)
#define TWO ((FLOATTYPE)2.0)

  int
main(int argc,char*argv[]){
  int i;
  FLOATTYPE x;

  x = ONE;
  for(i=0;;++i){
    printf("1.0<<%3d: x=%.0Lf",i,(long double)x);
    if((x+ONE)!=x &&
       (x+ONE)- x == ONE){
      printf(" ... can increment without loss of precision\n");
    }else{
      printf(" ... cannot increment without loss of precision\n");
      break;
    }
    x *= TWO;
  }

  printf("FLT_RADIX = %d\n",FLT_RADIX);
  printf("FLT_MANT_DIG = %d\n",FLT_MANT_DIG);
  printf("DBL_MANT_DIG = %d\n",DBL_MANT_DIG);
  printf("LDBL_MANT_DIG = %d\n",LDBL_MANT_DIG);
  printf("\nsizeof(FLOATTYPE) = %lu\n",sizeof(x));
}

Some results (using gcc-9 (Ubuntu 9.4.0-1ubuntu1~16.04) 9.4.0):

  • When FLOATTYPE is float: sizeof is 4, and the loop exits at i==24, which equals FLT_MANT_DIG.

  • When FLOATTYPE is double: sizeof is 8, and the loop exits at i==53, which equals DBL_MANT_DIG.

  • When FLOATTYPE is __float128: sizeof is 16, and the loop exits at i==113.

They all make sense. However:

  • When FLOATTYPE is long double: sizeof is 16, and the loop exits at i==53, which does not equal LDBL_MANT_DIG (which is 64).

It seems like long double is taking more memory than double, but not giving increased precision. How come?


Edit: more detail on the compiler etc: This is on a Windows 10 Pro machine, hosting Ubuntu 16.04 in Window Subsystem for Linux 1. The compiler reports this from gcc-9 -v:

Using built-in specs.
COLLECT_GCC=gcc-9
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/9/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:hsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu 9.4.0-1ubuntu1~16.04' --with-bugurl=file:///usr/share/doc/gcc-9/README.Bugs --enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,gm2 --prefix=/usr --with-gcc-major-version-only --program-suffix=-9 --program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id --libexecdir=/usr/lib --without-included-gettext --enable-threads=posix --libdir=/usr/lib --enable-nls --enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes --with-default-libstdcxx-abi=new --enable-gnu-unique-object --disable-vtable-verify --enable-plugin --with-system-zlib --with-target-system-zlib=auto --enable-objc-gc=auto --enable-multiarch --disable-werror --with-arch-32=i686 --with-abi=m64 --with-multilib-list=m32,m64,mx32 --enable-multilib --with-tune=generic --enable-offload-targets=nvptx-none=/build/gcc-9-SATzbE/gcc-9-9.4.0/debian/tmp-nvptx/usr,hsa --without-cuda-driver --enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu --target=x86_64-linux-gnu
Thread model: posix
gcc version 9.4.0 (Ubuntu 9.4.0-1ubuntu1~16.04)

The command to compile was simple: I commented out the definition of FLOATTYPE so that I could compile different versions, and ran:

gcc-9 test_precision0100.c -o test_precision0100_longdouble.exe -DFLOATTYPE="long double"

Then ran ./test_precision0100_longdouble.exe. The compiler does not give any warning messages with -Wall -Wextra -pedantic -std=c99, other than unused-parameter for argc and argv.

I get the same results with FLOATTYPE defined in the code as supplied above. I also get the same anomalous results with the built-in gcc v5.4.0, but not on another machine hosting Ubuntu 18.04 on WSL2. The output looks how you'd expect from my description, ending:

1.0<< 50: x=1125899906842624 ... can increment without loss of precision
1.0<< 51: x=2251799813685248 ... can increment without loss of precision
1.0<< 52: x=4503599627370496 ... can increment without loss of precision
1.0<< 53: x=9007199254740992 ... cannot increment without loss of precision
FLT_RADIX = 2
FLT_MANT_DIG = 24
DBL_MANT_DIG = 53
LDBL_MANT_DIG = 64

sizeof(FLOATTYPE) = 16

Here is "test_precision0100.s" from "gcc -S test_precision0100.c" as above:

    .file   "test_precision0100.c"
    .text
    .section    .rodata
.LC1:
    .string "1.0<<%3d: x=%.0Lf"
    .align 8
.LC2:
    .string " ... can increment without loss of precision"
    .align 8
.LC3:
    .string " ... cannot increment without loss of precision"
.LC4:
    .string "FLT_RADIX = %d\n"
.LC5:
    .string "FLT_MANT_DIG = %d\n"
.LC6:
    .string "DBL_MANT_DIG = %d\n"
.LC7:
    .string "LDBL_MANT_DIG = %d\n"
.LC8:
    .string "\nsizeof(FLOATTYPE) = %lu\n"
    .text
    .globl  main
    .type   main, @function
main:
.LFB2:
    .cfi_startproc
    pushq   %rbp
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp
    .cfi_def_cfa_register 6
    subq    $48, %rsp
    movl    %edi, -36(%rbp)
    movq    %rsi, -48(%rbp)
    fld1
    fstpt   -16(%rbp)
    movl    $0, -20(%rbp)
.L5:
    movl    -20(%rbp), %eax
    pushq   -8(%rbp)
    pushq   -16(%rbp)
    movl    %eax, %esi
    movl    $.LC1, %edi
    movl    $0, %eax
    call    printf
    addq    $16, %rsp
    fldt    -16(%rbp)
    fld1
    faddp   %st, %st(1)
    fldt    -16(%rbp)
    fucomip %st(1), %st
    jp  .L9
    fldt    -16(%rbp)
    fucomip %st(1), %st
    fstp    %st(0)
    je  .L2
    jmp .L7
.L9:
    fstp    %st(0)
.L7:
    fldt    -16(%rbp)
    fld1
    faddp   %st, %st(1)
    fldt    -16(%rbp)
    fsubrp  %st, %st(1)
    fld1
    fucomip %st(1), %st
    jp  .L10
    fld1
    fucomip %st(1), %st
    fstp    %st(0)
    jne .L2
    movl    $.LC2, %edi
    call    puts
    fldt    -16(%rbp)
    fadd    %st(0), %st
    fstpt   -16(%rbp)
    addl    $1, -20(%rbp)
    jmp .L5
.L10:
    fstp    %st(0)
.L2:
    movl    $.LC3, %edi
    call    puts
    nop
    movl    $2, %esi
    movl    $.LC4, %edi
    movl    $0, %eax
    call    printf
    movl    $24, %esi
    movl    $.LC5, %edi
    movl    $0, %eax
    call    printf
    movl    $53, %esi
    movl    $.LC6, %edi
    movl    $0, %eax
    call    printf
    movl    $64, %esi
    movl    $.LC7, %edi
    movl    $0, %eax
    call    printf
    movl    $16, %esi
    movl    $.LC8, %edi
    movl    $0, %eax
    call    printf
    movl    $0, %eax
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE2:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 9.4.0-1ubuntu1~16.04) 9.4.0"
    .section    .note.GNU-stack,"",@progbits
Ed Wynn
  • 295
  • 1
  • 8
  • Can't reproduce: https://godbolt.org/z/3rqdvqj94 – dbush Mar 19 '23 at 18:30
  • Also, the latest gcc in the Ubuntu 16.04 repo is 5.4, although godbolt shows the same behavior for both versions. – dbush Mar 19 '23 at 18:48
  • I can only reproduce the described behavior by adding a `-mpc64` compiler option. Weird compiler settings perhaps? – teapot418 Mar 19 '23 at 18:54
  • Please update the question with the exact output, along with the command used to compile and the output of `gcc -v`. – dbush Mar 19 '23 at 18:55
  • It is strange that when `__float128` is available, the `long double` is reported to be the same size, but with only 64 bits significand. That is more typical of the 80-bit `long double`. – Weather Vane Mar 19 '23 at 18:57
  • @teapot418: A statement of the contents of something is not a statement of its relevance to the discussion or of conclusions that can be derived from it, and your (original) first sentence is rude and inappropriate. – Eric Postpischil Mar 19 '23 at 19:14
  • @TomServo: The question is not about what characteristics `long double` may have. The question is about the fact that OP’s C implementation reports `long double` has having 64 digits (bits) in the significand but behaves as if it has 53. – Eric Postpischil Mar 19 '23 at 19:21
  • To some extent, the most worrying part of the results (for me) is that the results do not match LDBL_MANT_DIG. If long double gains no precision over double, then that's OK. If it gains no precision but uses extra memory, that's weird. But if it promises to gain precision and doesn't, that's bad. Admittedly, I may have messed up the library paths or something when installing multiple compilers. How would I check or correct this? – Ed Wynn Mar 19 '23 at 19:21
  • What else could go wrong - [doing long double math in SSE?](https://marc.info/?l=gcc-bugs&m=105190040516318&w=1) What CPU do you have? And it may be time to take the disassembler to the compiler output whether it used fpu instructions. – teapot418 Mar 19 '23 at 19:22
  • CPU is Intel(R) Xeon(R) CPU E3-1240 v5. – Ed Wynn Mar 19 '23 at 19:23
  • So `-march=sandybridge -mtune=sandybridge`. Nope, does not reproduce that either. – teapot418 Mar 19 '23 at 19:27
  • I get the same executable (testing by md5sum) with "-mfpmath=387 -mno-sse", and with "-mfpmath=sse", as appended to "gcc-9 test_precision0100.c -o test_precision0100_longdouble.exe -DFLOATTYPE="long double" -Wall -Wextra -pedantic -std=c99". – Ed Wynn Mar 19 '23 at 19:35
  • 1
    [related WSL1 bug?](https://github.com/microsoft/WSL/issues/830), should not show up in WSL2. Also contains possible workaround for a single process (no fork). – teapot418 Mar 19 '23 at 19:49
  • @teapot1418 -- you could be right. I've just completely flipped a comment saying that I see the same results on a different machine with WSL2 -- I looked again, and in fact I get the expected non-anomalous results on the WSL2 machine. I've also added the compiler output, though it means nothing to me. – Ed Wynn Mar 19 '23 at 19:55
  • @teapot1418 -- YES! The _FPU_SETCW command that you pointed to in https://github.com/microsoft/WSL/issues/830 does fix the output. – Ed Wynn Mar 19 '23 at 20:03
  • @TomServo: I fail to see how the comment right above proves your point. You commented that every numeric type is “compiler/platform/target” dependent (formally, in the terminology of the C standard, “implementation-defined”). However, the issue was not what the C implementation defined. The problem was a bug in the C implementation. – Eric Postpischil Mar 19 '23 at 23:58

1 Answers1

7

That is a long-standing WSL1 bug - https://github.com/microsoft/WSL/issues/830

You can try to work around it by adding:

#include <fpu_control.h>
...
int main() {
    unsigned short cw = 0x37f;
    _FPU_SETCW(cw);

to your program.

teapot418
  • 1,239
  • 1
  • 3
  • 9
  • 1
    Yes, that works -- thanks! Do I need to report this anywhere, or shall I just upgrade to WSL2 and get on with my life? – Ed Wynn Mar 19 '23 at 20:04
  • 1
    @EdWynn it seems to have been thoroughly reported already. If you can, just move to WSL2. – teapot418 Mar 19 '23 at 20:05