1

I have a dataframe

# Create a sample dataframe
df = pd.DataFrame({'num_posts': [4, 6, 3, 9, 1, 14, 2, 5, 7, 2,12,1,2,3],
                   'date' : ['2020-03-01', '2020-01-02', '2020-01-03', 
                            '2020-01-04', '2019-01-05', '2019-01-06', 
                            '2020-01-07', '2020-01-08', '2020-01-09', 
                            '2020-01-10','2020-01-11','2019-01-10','2019-01-08','2019-01-09']})
cols=['num_posts']  

Now i want to create some features such as prev_week_of_last_year, prev_month_of_last_year I am trying this but i am not getting right output

def create_calendar_columns(df,dt):
    df[dt]=pd.to_datetime(df[dt])
    df['day_number']= pd.to_datetime(df[dt]).dt.dayofyear
    df['week_number']= pd.to_datetime(df[dt]).dt.isocalendar().week
    df['year']= pd.to_datetime(df[dt]).dt.year
    return df
    
def calculate_curr_week_year_avg(df,cols,dt):
    df=create_calendar_columns(df,dt)
    for col in cols:
        df['week_avg_'+str(col)]= df.groupby(['year','week_number'])[col].transform(np.mean)
        df['year_avg_'+str(col)]= df.groupby(['year'])[col].transform(np.mean)
    return df
def calculate_prev_1w_curr_year(df,cols,dt):
    for col in cols:
        df['prev_1w_curr_year_'+str(col)]=df[(df['week_number']==(df['week_number']-1)) & (df['year']==df['year'])]['week_avg_'+str(col)]
    return df
    
df=calculate_curr_week_year_avg(df,cols,'date')    
df=calculate_prev_1w_curr_year(df,cols,'date')        
    

After i call calculate_curr_week_year_avg(df,cols,'date'), i got a dataframe like this

    num_posts       date  day_number  week_number  year  week_avg_num_posts  \
0           4 2020-03-01          61            9  2020               4.000   
1           6 2020-01-02           2            1  2020               6.000   
2           3 2020-01-03           3            1  2020               6.000   
3           9 2020-01-04           4            1  2020               6.000   
4           1 2019-01-05           5            1  2019               7.500   
5          14 2019-01-06           6            1  2019               7.500   
6           2 2020-01-07           7            2  2020               5.600   
7           5 2020-01-08           8            2  2020               5.600   
8           7 2020-01-09           9            2  2020               5.600   
9           2 2020-01-10          10            2  2020               5.600   
10         12 2020-01-11          11            2  2020               5.600   
11          1 2019-01-10          10            2  2019               2.000   
12          2 2019-01-08           8            2  2019               2.000   
13          3 2019-01-09           9            2  2019               2.000   

    year_avg_num_posts  
0                5.556  
1                5.556  
2                5.556  
3                5.556  
4                4.200  
5                4.200  
6                5.556  
7                5.556  
8                5.556  
9                5.556  
10               5.556  
11               4.200  
12               4.200  
13               4.200 

Now when i do the following, it gives me Nan

df['prev_1w_curr_year_'+str(col)]=df[(df['week_number']==(df['week_number']-1)) & (df['year']==df['year'])]['week_avg_'+str(col)]
user3585510
  • 131
  • 2
  • 10

1 Answers1

1

I hope I've understood your question right:

df["prev_1w_curr_year_num_posts"] = df.apply(
    lambda x: df.loc[(df['week_number'] == x["week_number"] - 1) & (df['year'] == x['year']), 'num_posts'].mean(),
    axis=1,
)

df["prev_week_of_last_year_num_posts"] = df.apply(
    lambda x: df.loc[(df['week_number'] == x["week_number"] - 1) & (df['year'] == x['year'] - 1), 'num_posts'].mean(),
    axis=1,
)

Prints:

    num_posts       date  day_number  week_number  year  week_avg_num_posts  year_avg_num_posts  prev_1w_curr_year_num_posts  prev_week_of_last_year_num_posts
0           4 2020-03-01          61            9  2020                 4.0            5.555556                          NaN                               NaN
1           6 2020-01-02           2            1  2020                 6.0            5.555556                          NaN                               NaN
2           3 2020-01-03           3            1  2020                 6.0            5.555556                          NaN                               NaN
3           9 2020-01-04           4            1  2020                 6.0            5.555556                          NaN                               NaN
4           1 2019-01-05           5            1  2019                 7.5            4.200000                          NaN                               NaN
5          14 2019-01-06           6            1  2019                 7.5            4.200000                          NaN                               NaN
6           2 2020-01-07           7            2  2020                 5.6            5.555556                          6.0                               7.5
7           5 2020-01-08           8            2  2020                 5.6            5.555556                          6.0                               7.5
8           7 2020-01-09           9            2  2020                 5.6            5.555556                          6.0                               7.5
9           2 2020-01-10          10            2  2020                 5.6            5.555556                          6.0                               7.5
10         12 2020-01-11          11            2  2020                 5.6            5.555556                          6.0                               7.5
11          1 2019-01-10          10            2  2019                 2.0            4.200000                          7.5                               NaN
12          2 2019-01-08           8            2  2019                 2.0            4.200000                          7.5                               NaN
13          3 2019-01-09           9            2  2019                 2.0            4.200000                          7.5                               NaN
Andrej Kesely
  • 168,389
  • 15
  • 48
  • 91