Proving the average value of an array

Question

Hello I want to prove the computation of the average of all the values contained in a 1d array,

so far I have the following program :

#include <stdbool.h>

typedef unsigned int size_t;
typedef struct Average avg;
struct Average
{
    bool success;
    float average;
};
/*@
axiomatic Float_Div{
    logic real f_div(real a,real b) = a/b ;

    axiom div:
        \forall real q,a,b;  0 != b ==>
        (a == b*q <==> q == f_div(a, b));

    axiom split :
        \forall real q,a,b,c;  0 != b ==>
        f_div(a + c , b) == f_div(a,b) + f_div(c,b);

}

axiomatic Average {
    logic real average(int * t, integer start, integer stop, integer size);

    axiom average_0:
        \forall int *t, integer start , integer stop, size;
        start >= stop ==> average(t,start, stop, size) == 0;

    axiom average_n:
        \forall int *t, integer start , integer stop, integer size;
        start < stop && size >0  ==>
        average(t,start, stop, size) ==
        f_div((real)stop-1 ,(real) size) +( average(t,start, stop-1, size) );

    axiom average_split :
        \forall int *t, integer start ,integer middle, integer stop, integer size;
        start < middle < stop && size >0  ==>
        average(t,start, stop, size) ==  average(t,start, middle, size) + average(t,middle, stop, size);

    axiom average_unit :
        \forall int *t, integer start , integer stop, integer size;
        start == stop-1 && size >0  ==>
        average(t,start, stop, size) == f_div((real)stop-1 ,(real) size);

}

*/


/*@
requires \valid(array + (0..size-1));
ensures (!\result.success) ==> size == 0 ;
ensures (\result.success) ==> \result.average == average(array, 0, size, size);
assigns \nothing;
*/
avg average(int * array, size_t size){
    avg ret;
    ret.success = true ;
    ret.average = 0 ;
    if (size == 0){
        ret.success = false;
        return ret;
    }
    float average = 0;
    /*@
    loop assigns i, average;
    loop invariant 0 <= i <= size;
    loop invariant  average(array , 0, i , size) == average;
    */
    for (size_t i = 0 ; i < size ; i ++){
        float value = ((float)array[i] / size);
        average += value;
    }
    ret.average = average ;
    return ret;
}

frama-c don't succeed to prove this loop invariant:

loop invariant  average(array , 0, i , size) == average;

did I do something wrong ? I don't know if my problem come from the precision of the float. I tried with a lot of assert and it's not working either Is it something that can be done in Frama-c ?

Edit :

I finally prove my fonction, I was doing the division before adding to the sum because every time I tried to make the sum fisrt I had a overflow.

the thing is I needed to prove that my sum does not overflow. so I imported limits.h and add a new loop invariant : INT_MIN * i <= sum <= INT_MAX * i; so my code now looks like this :

#include <stdbool.h>
#include <limits.h>

typedef unsigned int size_t;
typedef struct Average avg;
struct Average
{
    bool success;
    long long average;
};
/*@
axiomatic Sum{
    logic integer sum(int * t , integer start, integer end);

    axiom sum_false :
        \forall int *t, integer start , integer stop;
        start >= stop ==> sum(t,start,stop) == 0;

    axiom sum_true_start :
        \forall int *t, integer start , integer stop;
        0 <= start < stop ==>
        sum(t,start,stop) == sum(t,start,start+1) + sum(t,start+1,stop);

    axiom sum_true_end :
        \forall int *t, integer start , integer stop;
        0 <= start < stop ==>
        sum(t,start,stop) == sum(t,start,stop-1) + sum(t,stop-1,stop);

    axiom sum_split :
        \forall int *t, integer start , integer stop, integer middle;
        0 <= start<=  middle < stop ==>
        sum(t,start,stop) == sum(t,start,middle) + sum(t,middle,stop);


    axiom sum_alone :
        \forall int *t, integer start;
        (0<=start)
        ==>
        sum(t,start,start+1) == t[start] ;
}

*/
/*@
requires \valid(array + (0..size-1));
ensures (!\result.success) ==> size == 0 ;
ensures (\result.success) ==> (\result.average == sum(array,0,size)/size) ;
assigns \nothing;
*/
avg average(int * array, size_t size){
    //we use a structure to be sure that the function finish without error
    avg ret;
    ret.success = true ;
    ret.average = 0 ;
    if (size == 0){
        //if the size == 0 the function will fail
        ret.success = false;
        return ret;
    }
    else{
        /*
        the average is the sum of all the element of the array divided by the size
        An int is between - 2^15-1 and 2^15-1 that imply that the sum of
        all the element of an array is between
        -2^15 * size and 2^15 * size as size is between 0 and 2^16
        the sum is between -2^31 and 2^31
        a long long is between -2^63 and 2^63

        the sum of all the element can be inside a long long.
        */
        long long sum = 0;

        /*@
        loop assigns i, sum ;
        loop invariant 0 <= i <= size;
        loop invariant sum == sum(array,0,i);
        loop invariant INT_MIN * i <= sum <= INT_MAX * i;
        */
        for (size_t i = 0 ; i < size ; i ++){
            //@assert INT_MIN * i <= sum <= INT_MAX * i;

            sum += array[i];
            //@assert  i+1 <= size;

            //@assert INT_MIN * (i+1) <= sum <= INT_MAX * (i+1);
            //@assert ((LLONG_MIN < INT_MIN * size ) && (LLONG_MAX > INT_MAX* size));
            //@assert LLONG_MIN <= sum <= LLONG_MAX;

            //@assert sum == sum(array,0,i) + array[i];

        }
        ret.average = sum/size ;
        return ret;
    }
}

I let the assert but I'm sure a lot of them are useless.

Try giving it a small tolerance value. `fabs(average(...) - average) < epsilon` — Eugene Sh., Dec 16 '19 at 14:53
Perhaps it doesn't handle you using the symbol `average` for both the function and variable name? What happens if you don't shadow the function name, by using a different name for the variable (which will also make your code easier to read and follow)? — Some programmer dude, Dec 16 '19 at 14:54
Maths with floating point types is generally imprecise, leading to things that you expect to be equal to not be by tiny amounts (rounding errors). It is quite possibly it is complaining about that, you should be able to compare for "close enough" rather than exact equality. — Fire Lancer, Dec 16 '19 at 14:58
Can you please provide the full output you get when running this through frama-c? What are the exact errors you get? — Some programmer dude, Dec 16 '19 at 15:20
Is it intended to do a division at each step of the for loop ? it looks like it brings more imprecision than necessary ? Unless your array is big enough to overflow a `float` ? — Guillaume Petitjean, Dec 16 '19 at 15:28
wag: are you telling frama-c that you always assign average in that loop? Because size can be zero according to the invariant, then average does not necessarily get assigned. I know it is initialized; but this is about proof, not correctness :) — mevets, Dec 16 '19 at 20:51
regarding: `typedef unsigned int size_t;` The type `size_t` is defined in the header file `stdio.h` and it is defined as: `long unsigned int` Why are you trying to redefine that? — user3629249, Dec 17 '19 at 09:08

chux - Reinstate Monica · Accepted Answer · 2019-12-16T20:45:01.843

3

I want to prove the computation of the average of all the values contained in a 1d array

For exact math, avoid floating point.

Since array[] is int, stick with integer math.

Recommend code re-write.
Pseudo code to test "average of all the values contained in a 1d array"

// Compute sum of all elements of the array
wide_integer_type sum = 0
for (i=0; i<n; i++) 
  sum += array[i]

for (i=0; i<n; i++) 
  // below incurs no rounding like `array[i] == (double)sum/n` might
  if ((cast to wide_integer_type)array[i] * n == sum) 
    print "average found!" sum/n

edited Dec 16 '19 at 20:45

answered Dec 16 '19 at 15:57

chux - Reinstate Monica

143,097
13
135
256

Well that is ther first thing I tried to do but I'm stuck with an int overflow The division at every step is the only solution I found – Shinbly Dec 17 '19 at 02:17
1

"I'm stuck with an int overflow" --> Suggested code has _wide_integer_type_, so use `long long sum = 0; ... if ((long long)array[i] * n == sum)` – chux - Reinstate Monica Dec 17 '19 at 03:03
even if a `long long sum` I still have an overflow `/*@ assert rte: signed_overflow: -9223372036854775808 ≤ current_average + (long long)*(array + i); */ /*@ assert rte: signed_overflow: current_average + (long long)*(array + i) ≤ 9223372036854775807;` – Shinbly Dec 17 '19 at 07:38
I tried with a double (since the max value of the sum is 2^63)and I don't have overflow but the proof is still incomplete and I don't remember if frama-c handle the doubles – Shinbly Dec 17 '19 at 07:54
In [here](https://stackoverflow.com/questions/59358989/proving-the-average-value-of-an-array/59360095?noredirect=1#comment104931969_59360095), I'd expect `sum + (long long)*(array + i)`. Unclear what type and role is `current_average`. – chux - Reinstate Monica Dec 17 '19 at 11:33

Proving the average value of an array

1 Answers1