Here's a partial solution that, instead of doing essentially rejection sampling, uses the Gram-Schmidt process to generate your columns. It's partial, because it actually seems to be a bit slower than the rejection sampling approach (unless I did the timing wrong, somehow - I simply replaced data
with your implementation), and I had to bake in a few assumptions for demonstration's sake (a bunch of them could be removed, of course). I still think the idea might be viable if optimized by a wiser wizard than me, so I'm putting it out here.
import sys
import hypothesis.strategies as st
import numpy as np
from hypothesis import given
from hypothesis.extra.numpy import arrays
def project_onto_u(u, v):
return np.dot(u, v) / np.dot(u, u) * u
@st.composite
def data(draw, shape):
elements = st.floats(
min_value=0.01,
max_value=10,
allow_nan=False,
allow_infinity=False,
allow_subnormal=False,
)
V = draw(arrays(float, shape, elements=elements, unique=True))
vectors = []
for v in V:
u = v - sum(project_onto_u(u_i, v) for u_i in vectors)
vectors.append(u)
X = np.vstack(vectors)
assert X.shape == shape
return X
@given(X=data(shape=(10, 10)))
def test_matrix_has_independent_columns(X):
assert np.linalg.cond(np.dot(X.T, X)) < 1 / sys.float_info.epsilon