1

I am new to Hypothesis and I would like to know if there is a better way to use to Hypothesis than what I have done here...

class TestFindEmptyColumns:
    def test_one_empty_column(self):
        input = pd.DataFrame({
            'quantity': [None],
        })
        expected_output = ['quantity']
        assert find_empty_columns(input) == expected_output

    def test_no_empty_column(self):
        input = pd.DataFrame({
            'item': ["Item1", ],
            'quantity': [10, ],
        })
        expected_output = []
        assert find_empty_columns(input) == expected_output

    @given(data_frames([
        column(name='col1', elements=st.none() | st.integers()),
        column(name='col2', elements=st.none() | st.integers()),
    ]))
    def test_dataframe_with_random_number_of_columns(self, df):
        df_with_no_empty_columns = df.dropna(how='all', axis=1)
        result = find_empty_columns(df)

        # None of the empty columns should be in the reference dataframe df_with_no_empty_columns
        assert set(result).isdisjoint(df_with_no_empty_columns.columns)

        # The above assert does not catch the condition if the result is a column name
        # that is not there in the data-frame at all e.g. 'col3'
        assert set(result).issubset(df.columns)

Ideally, I want a dataframe which has a variable number of columns in each test run. The columns can contain any value - some of the columns should contains all null values. Any help would be appreciated?

Siraj Samsudeen
  • 1,624
  • 7
  • 26
  • 35

0 Answers0