create table events(
id char(36) PRIMARY KEY,
game_id varchar(24) not null,
user_device_id char(36) not null,
event_name varchar(100) not null,
generated_at timestamp with time zone not null
);
SELECT
events.generated_at::DATE AS time_stamp,
COUNT(DISTINCT (
CASE WHEN
events.event_name = 'new_user' THEN events.user_device_id
END
)
) as new_users,
COUNT(DISTINCT (
CASE WHEN
future_events.event_name <> 'new_user' THEN future_events.user_device_id
END
)
) as returned_users,
COUNT(DISTINCT (
CASE WHEN
future_events.event_name <> 'new_user' THEN future_events.user_device_id
END
)) / COUNT(DISTINCT (
CASE WHEN
events.event_name = 'new_user' THEN events.user_device_id
END
))::float as retention
FROM events
LEFT JOIN events AS future_events ON
events.user_device_id = future_events.user_device_id AND
events.generated_at = future_events.generated_at - interval '1 day' AND
events.game_id = future_events.game_id
GROUP BY
time_stamp
ORDER BY
time_stamp;
I am trying to get the Day N ('N' -> any number between 1 to 7) user retention via the above sql query. Due to the fact that I am a noob in HPE vertica, I am not being able to come up the optimum aggregate projection creating statement, Since projection significantly improves the performance of the query.