0

This question is about the Hypothesis library for property-based testing.

I want a strategy that would give me lists of positive floats with a fixed size, a specified sum, and such that no elements would be lower than some specified minimum threshold.

For example:

size = 5
sum_ = 1
threshold = 0.1
...
for _ in range(3)
    print(magic_strategy.example())

could give something like this:

[0.4, 0.2, 0.1, 0.1, 0.2]
[0.15, 0.25, 0.25, 0.2, 0.15]
[0.2, 0.2, 0.2, 0.2, 0.2]

How do I write such a strategy?


Failed attempt with filtering:

from hypothesis import strategies as st

size = 5
sum_ = 1
threshold = 0.1

domain_values = st.floats(min_value=0, allow_infinity=False, exclude_min=True)
domain_values_lists = st.lists(domain_values, min_size=size, max_size=size)
normalized_lists = domain_values_lists.map(lambda values: [value * sum_ / sum(values) 
                                                           for value in values])
lists_with_threshold = normalized_lists.filter(lambda values: all(value > threshold for value in values))

The problem with this approach is that for some reason whenever I'm taking example from this strategy it always gives me lists of the same value regardless of the given input parameters:

[0.2, 0.2, 0.2, ..., 0.2]
[0.01, 0.01, 0.01, ..., 0.01]
[4.0, 4.0, 4.0, ..., 4.0]
Georgy
  • 12,464
  • 7
  • 65
  • 73

1 Answers1

1

The main idea is to build some values and then map them to interval [0, 1], after that -- to [0, sum_] with making it a partition of sum_ (i.e. sum of elements will be equal to sum_) and finally -- move lower bound to [min_value, sum_] preserving partition property:

from math import floor
from numbers import Real
from typing import List

from hypothesis import strategies
from hypothesis.strategies import SearchStrategy as Strategy

MIN_PARTITION_SIZE = 1


def to_partitions(sum_: Real,
                  *,
                  min_value: Real = 0,
                  size: int = MIN_PARTITION_SIZE,
                  base: Strategy[Real] = strategies.integers()
                  ) -> Strategy[List[Real]]:
    if size < MIN_PARTITION_SIZE:
        raise ValueError('`size` should not be less '
                         f'than {MIN_PARTITION_SIZE}.')
    if not (0 <= min_value <= sum_):
        raise ValueError(f'`min_value` should be in [0, {sum_}] interval.')
    if min_value:
        max_size_approximation = sum_ / min_value
        if math.isfinite(max_size_approximation):
            max_size = floor(max_size_approximation)
            if max_size < size:
                raise ValueError(f'`size` should not be greater than {max_size}.')

    def to_proportions(numbers: List[Real]) -> List[Real]:
        return [2 * abs(number) / (1 + number * number) for number in numbers]

    def to_partition(proportions: List[Real]) -> List[Real]:
        factor = sum_ / sum(proportions)
        return [proportion * factor for proportion in proportions]

    def bound_minimum(partition: List[Real]) -> List[Real]:
        minimum = min(partition)
        if minimum >= min_value:
            return partition
        partition_size = len(partition)
        denominator = sum_ - partition_size * minimum
        slope = sum_ - partition_size * min_value
        intercept = sum_ * (min_value - minimum)
        return [max((part * slope + intercept) / denominator, min_value)
                for part in partition]

    def normalize(partition: List[Real]) -> List[Real]:
        partition_sum = sum(partition)
        if partition_sum < sum_:
            arg_min = min(range(len(partition)),
                          key=partition.__getitem__)
            partition[arg_min] += sum_ - partition_sum
        elif partition_sum > sum_:
            arg_max = max(range(len(partition)),
                          key=partition.__getitem__)
            partition[arg_max] -= partition_sum - sum_
        return partition

    def is_valid(partition: List[Real]) -> bool:
        return sum(partition) == sum_

    return (strategies.lists(base,
                             min_size=size,
                             max_size=size)
            .filter(any)
            .map(to_proportions)
            .map(to_partition)
            .map(bound_minimum)
            .map(normalize)
            .filter(is_valid))

for numbers "normalization" we are using a well-known property of real numbers

(x - 1) ^ 2 >= 0  | since square is non-negative
x^2 + 1 >= 2 * x  | divide both sides by positive x^2 + 1
...
1 >= 2 * x / (x^2 + 1)

we are also adding some tricks to handle possible issues with floating point numbers (like having an element close to but less than min_value after moving bound to min_value and having sum of partition close but not equal to sum).

Test

from math import floor
from numbers import Real
from typing import List

from hypothesis import (given, 
                        strategies)
from hypothesis.strategies import DataObject


@given(strategies.data(), strategies.floats(0, 100))
def test_to_partitions(data: DataObject, sum_: Real) -> None:
    min_value = data.draw(strategies.floats(0, sum_))
    size = data.draw(strategies.integers(MIN_PARTITION_SIZE,
                                         floor(min(sum_ / min_value, 100))
                                         if min_value
                                         else 100))
    strategy = to_partitions(sum_,
                             min_value=min_value,
                             size=size)

    partition = data.draw(strategy)

    assert sum(partition) == sum_
    assert len(partition) == size
    assert all(part >= min_value for part in partition)

seems to pass.

Azat Ibrakov
  • 9,998
  • 9
  • 38
  • 50
  • This works perfectly for me. Any idea why all of this is necessary, though? Why does my obvious approach with `filter` fail? – Georgy Jul 27 '20 at 13:36