需求结果:
--表创建
CREATE TABLE athlete_results
(
athlete_id INT,
match_time TIMESTAMP,
result VARCHAR(10) -- 'win', 'lose', 'draw'
);
--数据装载
INSERT INTO athlete_results
VALUES (1, '2023-11-01', 'win'),
(1, '2023-11-02', 'win'),
(1, '2023-11-03', 'lose'),
(1, '2023-11-04', 'win'),
(1, '2023-11-05', 'draw'),
(1, '2023-11-06', 'win'),
(1, '2023-11-07', 'win'),
(1, '2023-11-08', 'win'),
(2, '2023-11-01', 'win'),
(2, '2023-11-02', 'lose'),
(2, '2023-11-03', 'draw'),
(2, '2023-11-04', 'win'),
(2, '2023-11-05', 'win'),
(2, '2023-11-06', 'win');
首先使用row_number()开窗根据运动员id进行分区,日期进行排序得到分组字段rn1,row_number()开窗根据运动员id和比赛结果进行分区,日期进行排序得到分组字段rn2,如下图rn1 - rn2可以唯一表述每个运动员在一次连续胜利、失败、平局的分组字段。
select athlete_id,
to_date(match_time) match_time,
result,
row_number() over (partition by athlete_id order by to_date(match_time)) group1,
row_number() over (partition by athlete_id, result order by to_date(match_time)) group2
from athlete_results;
运行结果:?
select athlete_id,
group1 - group2 win_group,
count(*) win_count
from (
select athlete_id,
to_date(match_time) match_time,
result,
row_number() over (partition by athlete_id order by to_date(match_time)) group1,
row_number() over (partition by athlete_id, result order by to_date(match_time)) group2
from athlete_results
) t
where result = 'win'
group by athlete_id, group1 - group2;
运行结果:
select athlete_id,
max(win_count) max_win_count
from (
select athlete_id,
group1 - group2 win_group,
count(*) win_count
from (
select athlete_id,
to_date(match_time) match_time,
result,
row_number() over (partition by athlete_id order by to_date(match_time)) group1,
row_number() over (partition by athlete_id, result order by to_date(match_time)) group2
from athlete_results
) t
where result = 'win'
group by athlete_id, group1 - group2
) t
group by athlete_id;
运行结果:?
思考打破连续胜利的条件,仔细思考可知,一个运动员每次失败或者平局都会开启一个新的分组,怎么描述这个分组呢?很容易可以想到sum(if())结构,如下图?
select athlete_id,
to_date(match_time) match_day,
result,
sum(if(result = 'lose' or result = 'draw', 1, 0))
over (partition by athlete_id order by to_date(match_time)) win_group
from athlete_results;
运行结果:
select athlete_id,
count(*) win_count
from (
select athlete_id,
result,
sum(if(result = 'lose' or result = 'draw', 1, 0))
over (partition by athlete_id order by match_time) win_group
from athlete_results) t
where result = 'win'
group by athlete_id, win_group;
运行结果:
select athlete_id,
max(win_count) max_win_count
from (
select athlete_id,
count(*) win_count
from (
select athlete_id,
result,
sum(if(result = 'lose' or result = 'draw', 1, 0))
over (partition by athlete_id order by match_time) win_group
from athlete_results) t
where result = 'win'
group by athlete_id, win_group
) t
group by athlete_id;
运行结果: