问题:如下为某直播平台各主播的开播及关播时间明细数据,现在需要计算出该平台最高峰期同时在线的主播人数。
user_id start_date end_date
1001 2021-06-14 12:12:12 2021-06-14 18:12:12
1003 2021-06-14 13:12:12 2021-06-14 16:12:12
1004 2021-06-14 13:15:12 2021-06-14 20:12:12
1002 2021-06-14 15:12:12 2021-06-14 16:12:12
1005 2021-06-14 15:18:12 2021-06-14 20:12:12
1001 2021-06-14 20:12:12 2021-06-14 23:12:12
1006 2021-06-14 21:12:12 2021-06-14 23:15:12
1007 2021-06-14 22:12:12 2021-06-14 23:10:12
这是非常经典的一个面试题,不管大厂小厂都有问到过。解题思路也比较固定:就是用1代表开播(此时用开播时间),-1代表关播(此时用关播时间),可以理解1代表主播开播加入增1,-1代表主播关播离开减1,然后开窗可以计算出到每个时间点时有多少主播同时在线,最后求最大值即可。
with tmp as
(
select 1001 as user_id, '2021-06-14 12:12:12' as start_date , '2021-06-14 18:12:12' as end_date
union all
select 1003 as user_id, '2021-06-14 13:12:12' as start_date , '2021-06-14 16:12:12' as end_date
union all
select 1004 as user_id, '2021-06-14 13:15:12' as start_date , '2021-06-14 20:12:12' as end_date
union all
select 1002 as user_id, '2021-06-14 15:12:12' as start_date , '2021-06-14 16:12:12' as end_date
union all
select 1005 as user_id, '2021-06-14 15:18:12' as start_date , '2021-06-14 20:12:12' as end_date
union all
select 1001 as user_id, '2021-06-14 20:12:12' as start_date , '2021-06-14 23:12:12' as end_date
union all
select 1006 as user_id, '2021-06-14 21:12:12' as start_date , '2021-06-14 23:15:12' as end_date
union all
select 1007 as user_id, '2021-06-14 22:12:12' as start_date , '2021-06-14 23:10:12' as end_date
)
select
max(online_nums) as max_online_nums
from
(
select
user_id
,dt
,sum(flag) over(order by dt) as online_nums
from
(
select
user_id
,start_date as dt
,1 as flag --开播记为1
from tmp
union all
select
user_id
,end_date as dt
,-1 as flag --关播记为-1
from tmp
)t1
)t1
;