HAWQ. How to join in/out rows by in/out time?
simple
thanks
I believe this is what you are trying to achieve. The trick is to use the window function "row_number()".
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
You can write this with common table expressions too if you prefer that format.
with sub1 as (select car_id, id as id_in, cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in'),
sub2 as (select car_id, id as id_out, cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out')
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in,
sub2.id_out, sub2.cross_date_time_out
from sub1
join sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
create table source_table
(
id INT
,car_id INT
,direction text
,cross_date_time TIMESTAMP
);
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-18-10:20:15', 'yyyy-MM-dd hh:mi:ss'));;
insert into source_table
values (1, 1,'in', to_timestamp('2017-02-25-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-09-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 1,'out', to_timestamp('2017-02-27-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'in', to_timestamp('2017-02-18-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-14-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-27-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 2,'out', to_timestamp('2017-02-29-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'in', to_timestamp('2017-02-02-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'in', to_timestamp('2017-02-12-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'out', to_timestamp('2017-02-08-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
insert into source_table
values (1, 3,'out', to_timestamp('2017-02-14-10:20:15', 'yyyy-MM-dd hh:mi:ss'));
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
Wrong result.
1 1 2017-02-12 10:20:15.000000 1 2017-02-09 10:20:15.000000
if use a left join variant:
select sub1.car_id, sub1.id_in, sub1.cross_date_time_in, sub2.id_out, sub2.cross_date_time_out
from (
select car_id, id as id_in,
cross_date_time as cross_date_time_in,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'in') as sub1
left join (select car_id, id as id_out,
cross_date_time as cross_date_time_out,
row_number() over (partition by car_id order by cross_date_time) as row_num
from source_table
where direction = 'out') as sub2 on sub1.car_id = sub2.car_id and sub1.row_num = sub2.row_num;
wrong results: 1 1 2017-02-12 10:20:15.000000 1 2017-02-09 10:20:15.000000 1 1 2017-02-18 10:20:15.000000 1 2017-02-27 10:20:15.000000 1 1 2017-02-25 10:20:15.000000