I am trying to recreate GA funnel in BigQuery, this open funnel would exclude sessions that have viewed certain pages, I have tried using the following: AND NOT REGEXP_MATCH, NOT IN, but it still not working as I expect, I am still getting sessions that viewed the pages I want to exclude.
I want to make this a close funnel as well if possible, this code returns an open funnel.
Also, is there a better way of writing this query in Standard SQL?
Need help with these. Thanks.
SELECT COUNT(s0.firstHit) AS _test_your_details, SUM(s0.exit) AS _test_your_details_exits, COUNT(s1.firstHit) AS _test_additional_new_details, SUM(s1.exit) AS _test_additional_new_details_exits, COUNT(s2.firstHit) AS _test_new_dress, SUM(s2.exit) AS _test_new_dress_exits, COUNT(s3.firstHit) AS _test_test_details, SUM(s3.exit) AS _test_test_details_exits, COUNT(s4.firstHit) AS _test_cover_for_the_test, SUM(s4.exit) AS _test_cover_for_the_test_exits, COUNT(s5.firstHit) AS _test_your_order, SUM(s5.exit) AS _test_your_order_exits FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit, s2.firstHit, s2.exit, s3.firstHit, s3.exit, s4.firstHit, s4.exit, s5.firstHit, s5.exit FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit, s2.firstHit, s2.exit, s3.firstHit, s3.exit, s4.firstHit, s4.exit FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit, s2.firstHit, s2.exit, s3.firstHit, s3.exit FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit, s2.firstHit, s2.exit FROM (SELECT s0.fullVisitorId, s0.visitId, s0.firstHit, s0.exit, s1.firstHit, s1.exit FROM (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - your details') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress', '/test - cover dress') AND NOT REGEXP_MATCH(hits.page.pagePath, r"^/(test - additional test details|test - test dress|test - cover dress)") GROUP BY fullVisitorId, visitId) s0 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - additional new details') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' GROUP BY fullVisitorId, visitId) s1 ON s0.fullVisitorId = s1.fullVisitorId AND s0.visitId = s1.visitId) s01 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - new dress') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' GROUP BY fullVisitorId, visitId) s2 ON s0.fullVisitorId = s2.fullVisitorId AND s0.visitId = s2.visitId) s012 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - test details') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' GROUP BY fullVisitorId, visitId) s3 ON s0.fullVisitorId = s3.fullVisitorId AND s0.visitId = s3.visitId) s0123 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - cover for the test') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress') GROUP BY fullVisitorId, visitId) s4 ON s0.fullVisitorId = s4.fullVisitorId AND s0.visitId = s4.visitId) s01234 FULL OUTER JOIN EACH (SELECT fullVisitorId, visitId, MIN(hits.hitNumber) AS firstHit, MAX(IF(hits.isExit, 1, 0)) AS exit FROM TABLE_DATE_RANGE([xxxxxxxx.ga_sessions_], TIMESTAMP('2018-11-01'), TIMESTAMP('2018-11-30')) WHERE REGEXP_MATCH(hits.page.pagePath, '/test - your order') AND totals.visits = 1 AND channelGrouping NOT LIKE '%organic%' AND hits.page.pagePath NOT in ('/test - additional test details', '/test - test dress') AND NOT REGEXP_MATCH(hits.page.pagePath, r"^/(test - additional test details|test - test dress|test - cover dress)") GROUP BY fullVisitorId, visitId) s5 ON s0.fullVisitorId = s5.fullVisitorId AND s0.visitId = s5.visitId) s012345