1

This is a ClickHouse request, but I think i'm missing some core sql solution. I want to count maximum number of rows per minute during each day:

SELECT date_date,
       formatDateTime(date_time,'%F %H:%M:00', 'Asia/Dubai') AS Max_TPS_Time,
       count(1) AS Max_TPS
FROM TEST
GROUP BY Max_TPS_Time,
         date_date

Which result in

date_date   Max_TPS_Time    Max_TPS
19/11/19    2019-11-19 11:37:00 4
19/11/19    2019-11-19 11:15:00 2
19/11/19    2019-11-19 11:40:00 5
19/11/18    2019-11-18 12:37:00 4
19/11/18    2019-11-18 12:15:00 2
19/11/18    2019-11-18 12:40:00 6

And In the end I want to see max value per day with appropriate datetime.

date_date   Max_TPS_Time    Max_TPS
19/11/19    2019-11-19 11:40:00 5
19/11/18    2019-11-18 12:40:00 6
GMB
  • 216,147
  • 25
  • 84
  • 135

3 Answers3

2

I would simplify/optimize your queries like this:

SELECT
    toDate(max_tps_time) AS date_date,
    toTimeZone(toStartOfMinute(utc_date_time), 'Asia/Dubai') AS max_tps_time,
    count() AS max_tps
FROM
( /* test data */
    SELECT arrayJoin([
      toDateTime('2019-11-19 07:37:01', 'UTC'), toDateTime('2019-11-19 07:37:11', 'UTC'), toDateTime('2019-11-19 07:37:21', 'UTC'), toDateTime('2019-11-19 07:37:31', 'UTC'), 
      toDateTime('2019-11-18 08:15:21', 'UTC'), toDateTime('2019-11-18 08:15:42', 'UTC'),
      toDateTime('2019-11-19 07:40:01', 'UTC'), toDateTime('2019-11-19 07:40:11', 'UTC'), toDateTime('2019-11-19 07:40:21', 'UTC'), toDateTime('2019-11-19 07:40:31', 'UTC'), toDateTime('2019-11-19 07:40:41', 'UTC'), 
      toDateTime('2019-11-18 08:40:18', 'UTC'), toDateTime('2019-11-18 08:40:20', 'UTC'), toDateTime('2019-11-18 08:40:22', 'UTC'), toDateTime('2019-11-18 08:40:24', 'UTC'), toDateTime('2019-11-18 08:40:26', 'UTC'), toDateTime('2019-11-18 08:40:28', 'UTC')]) AS utc_date_time
)
GROUP BY max_tps_time
ORDER BY max_tps_time;

/* result
┌──date_date─┬────────max_tps_time─┬─max_tps─┐
│ 2019-11-18 │ 2019-11-18 12:15:00 │       2 │
│ 2019-11-18 │ 2019-11-18 12:40:00 │       6 │
│ 2019-11-19 │ 2019-11-19 11:37:00 │       4 │
│ 2019-11-19 │ 2019-11-19 11:40:00 │       5 │
└────────────┴─────────────────────┴─────────┘
*/
SELECT 
  toDate(max_tps_time) AS date_date, 
  argMax(max_tps_time, max_tps) max_tps_time_by_day,
  max(max_tps) max_tps_by_day
FROM (
  SELECT
      toTimeZone(toStartOfMinute(utc_date_time), 'Asia/Dubai') AS max_tps_time,
      count() AS max_tps
  FROM
  ( /* test data */
      SELECT arrayJoin([
        toDateTime('2019-11-19 07:37:01', 'UTC'), toDateTime('2019-11-19 07:37:11', 'UTC'), toDateTime('2019-11-19 07:37:21', 'UTC'), toDateTime('2019-11-19 07:37:31', 'UTC'), 
        toDateTime('2019-11-18 08:15:21', 'UTC'), toDateTime('2019-11-18 08:15:42', 'UTC'),
        toDateTime('2019-11-19 07:40:01', 'UTC'), toDateTime('2019-11-19 07:40:11', 'UTC'), toDateTime('2019-11-19 07:40:21', 'UTC'), toDateTime('2019-11-19 07:40:31', 'UTC'), toDateTime('2019-11-19 07:40:41', 'UTC'), 
        toDateTime('2019-11-18 08:40:18', 'UTC'), toDateTime('2019-11-18 08:40:20', 'UTC'), toDateTime('2019-11-18 08:40:22', 'UTC'), toDateTime('2019-11-18 08:40:24', 'UTC'), toDateTime('2019-11-18 08:40:26', 'UTC'), toDateTime('2019-11-18 08:40:28', 'UTC')]) AS utc_date_time
  )
  GROUP BY max_tps_time)
GROUP BY date_date  
ORDER BY date_date;

/* result
┌──date_date─┬─max_tps_time_by_day─┬─max_tps_by_day─┐
│ 2019-11-18 │ 2019-11-18 12:40:00 │              6 │
│ 2019-11-19 │ 2019-11-19 11:40:00 │              5 │
└────────────┴─────────────────────┴────────────────┘
*/

These queries can be merged into one query by using ROLLUP. The result will contain two sub-results for 'daily' and 'minute' aggregates:

SELECT 
  toDate(max_tps_time) AS date_date, 
  argMax(max_tps_time, max_tps) max_tps_time_by_day,
  max(max_tps) max_tps_by_day,
  toInt32(max_tps_time) = 0 ? 1 : 0 is_daily_aggregate 
FROM (
  SELECT
      toTimeZone(toStartOfMinute(utc_date_time), 'Asia/Dubai') AS max_tps_time,
      count() AS max_tps
  FROM
  ( /* test data */
      SELECT arrayJoin([
        toDateTime('2019-11-19 07:37:01', 'UTC'), toDateTime('2019-11-19 07:37:11', 'UTC'), toDateTime('2019-11-19 07:37:21', 'UTC'), toDateTime('2019-11-19 07:37:31', 'UTC'), 
        toDateTime('2019-11-18 08:15:21', 'UTC'), toDateTime('2019-11-18 08:15:42', 'UTC'),
        toDateTime('2019-11-19 07:40:01', 'UTC'), toDateTime('2019-11-19 07:40:11', 'UTC'), toDateTime('2019-11-19 07:40:21', 'UTC'), toDateTime('2019-11-19 07:40:31', 'UTC'), toDateTime('2019-11-19 07:40:41', 'UTC'), 
        toDateTime('2019-11-18 08:40:18', 'UTC'), toDateTime('2019-11-18 08:40:20', 'UTC'), toDateTime('2019-11-18 08:40:22', 'UTC'), toDateTime('2019-11-18 08:40:24', 'UTC'), toDateTime('2019-11-18 08:40:26', 'UTC'), toDateTime('2019-11-18 08:40:28', 'UTC')]) AS utc_date_time
  )
  GROUP BY max_tps_time)
GROUP BY date_date, max_tps_time WITH ROLLUP
HAVING toInt32(date_date) != 0
ORDER BY max_tps_time, max_tps_time_by_day;

/* result
┌──date_date─┬─max_tps_time_by_day─┬─max_tps_by_day─┬─is_daily_aggregate─┐
│ 2019-11-18 │ 2019-11-18 12:40:00 │              6 │                  1 │<-- daily aggregate
│ 2019-11-19 │ 2019-11-19 11:40:00 │              5 │                  1 │<-- daily aggregate
│ 2019-11-18 │ 2019-11-18 12:15:00 │              2 │                  0 │<-- minute aggregate
│ 2019-11-18 │ 2019-11-18 12:40:00 │              6 │                  0 │<-- minute aggregate
│ 2019-11-19 │ 2019-11-19 11:37:00 │              4 │                  0 │<-- minute aggregate
│ 2019-11-19 │ 2019-11-19 11:40:00 │              5 │                  0 │<-- minute aggregate
└────────────┴─────────────────────┴────────────────┴────────────────────┘
*/
vladimir
  • 13,428
  • 2
  • 44
  • 70
0

You are looking for the top 1 record per group out of the result of your aggregate query.

As far as I know, clickhouse does not support window functions (which would have make this task easier).

But it supports common table expressions: so you could turn the existing query to a CTE, and then filter with a self-correlated subquery:

WITH cte as (
    SELECT 
        date_date,
        formatDateTime(date_time,'%F %H:%M:00', 'Asia/Dubai') AS Max_TPS_Time,
        count(*) AS Max_TPS
    FROM TEST
    GROUP BY Max_TPS_Time, date_date
)
SELECT c.*
FROM cte c
WHERE c.Max_TPS = (SELECT MAX(c1.Max_TPS) FROM cte c1 WHERE c1.date_date = c.date_date)

Please note that if you just want the Max_TPS per day but you don't need the corresponding Max_TPS_Time, then it's simpler, you can just add another level of aggregation to your current query:

SELECT date_date, MAX(Max_TPS) Max_TPS
FROM (
    SELECT 
        date_date,
        formatDateTime(date_time,'%F %H:%M:00', 'Asia/Dubai') AS Max_TPS_Time,
        count(*) AS Max_TPS
    FROM TEST
    GROUP BY Max_TPS_Time, date_date
) t
GROUP BY date_date

EDIT

If the above CTE solution does not work in clickhouse, then you can repeat the aggregate query, like so:

SELECT t.*
FROM (
    SELECT 
        date_date,
        formatDateTime(date_time,'%F %H:%M:00', 'Asia/Dubai') AS Max_TPS_Time,
        count(*) AS Max_TPS
    FROM TEST
    GROUP BY Max_TPS_Time, date_date
) t
WHERE t.Max_TPS = (
    SELECT count(*)
    FROM TEST t
    GROUP BY formatDateTime(date_time,'%F %H:%M:00', 'Asia/Dubai')
    WHERE t.date_date = c.date_date
)
GMB
  • 216,147
  • 25
  • 84
  • 135
0
SELECT date_date,
       formatDateTime(date_time,'%F %H:%M:00', 'Asia/Dubai') AS Max_TPS_Time,
       count() AS Max_TPS
FROM TEST GROUP BY Max_TPS_Time, date_date
order by Max_TPS_Time, date_date, Max_TPS desc 
limit 1 by Max_TPS_Time, date_date


select argMax(date_date, Max_TPS) date_date,  
argMax(Max_TPS_Time, Max_TPS) Max_TPS_Time, 
Max_TPS
from (
   SELECT date_date,
       formatDateTime(date_time,'%F %H:%M:00', 'Asia/Dubai') AS Max_TPS_Time,
       count() AS Max_TPS
   FROM TEST GROUP BY Max_TPS_Time, date_date)
Denny Crane
  • 11,574
  • 2
  • 19
  • 30