I am new to Hypothesis and I would like to know if there is a better way to use to Hypothesis than what I have done here...
class TestFindEmptyColumns:
def test_one_empty_column(self):
input = pd.DataFrame({
'quantity': [None],
})
expected_output = ['quantity']
assert find_empty_columns(input) == expected_output
def test_no_empty_column(self):
input = pd.DataFrame({
'item': ["Item1", ],
'quantity': [10, ],
})
expected_output = []
assert find_empty_columns(input) == expected_output
@given(data_frames([
column(name='col1', elements=st.none() | st.integers()),
column(name='col2', elements=st.none() | st.integers()),
]))
def test_dataframe_with_random_number_of_columns(self, df):
df_with_no_empty_columns = df.dropna(how='all', axis=1)
result = find_empty_columns(df)
# None of the empty columns should be in the reference dataframe df_with_no_empty_columns
assert set(result).isdisjoint(df_with_no_empty_columns.columns)
# The above assert does not catch the condition if the result is a column name
# that is not there in the data-frame at all e.g. 'col3'
assert set(result).issubset(df.columns)
Ideally, I want a dataframe which has a variable number of columns in each test run. The columns can contain any value - some of the columns should contains all null values. Any help would be appreciated?