I am working a lot of pandas dataframes and I want to test them using pytest and I am using hypothesis to generate the test data.
The issues I am having is that it is generating the same data values in each column.
I actually don't know how to generate real data to test with ..
Here is what I am trying :
from hypothesis.extra.pandas import data_frames , column, range_indexes
from hypothesis import given, settings, strategies as st
import pandas as pd
from datetime import datetime
data = data_frames(columns=[column(name='key', elements=st.floats(allow_nan=True)),
column(name='fbms_start_date', elements=st.datetimes(min_value=datetime(2020, 7, 1),
max_value=datetime.now())),
column(name='breakdown_type', elements=st.just("Total")),
column(name='breakdown_one', elements=st.just(float('nan'))),
column(name='adset_id', elements=st.floats(allow_nan=True)),
column(name='adset_name', elements=st.text()),
column(name='campaign_id', elements=st.floats(allow_nan=True, )),
column(name='campaign_name', elements=st.text()),
column(name='reach', elements=st.text()),
column(name='impressions', elements=st.just(float('nan'))),
column(name='spend', elements=st.floats(allow_nan=False)),
column(name='page_likes', elements=st.floats(allow_nan=False)),
column(name='post_engagement', elements=st.sampled_from(['LINK_CLICKS',
'POST_ENGAGEMENT',
'PAGE_LIKES'])),
column(name='objective', elements=st.floats(allow_nan=False)),
column(name='ads_run', elements=st.sampled_from([True, False]))],
index=range_indexes(min_size=100)
)
@given(df=data)
@settings(max_examples=5)
def test_hyothesis(df):
print(df)
assert 1
this is always generating the following dataset
key fbms_start_date breakdown_type breakdown_one adset_id adset_name campaign_id campaign_name reach impressions spend page_likes post_engagement objective ads_run
0 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
1 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
2 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
3 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
4 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
5 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
6 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
7 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
8 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
9 0.0 2020-07-01 Total 0.0 0.0 0.0 0.0 LINK_CLICKS 0.0 True
And as you can see for that each colunms have one values not unique values , I don't know how to generate real values I can test with ..
Any help will be helpful...