在SQL分组后取第一条记录中介绍了分组取满足条件的第一条记录的方法,现在业务上面临如此需求:在做公司流程监控时,要求监控每个流程每个节点的用时情况。其中有个字段isend可以判断流程是否结束,但是流程结束后可能还会有操作(例如转发、会签等),分析时需要将结束后的节点删除掉。
思路:根据流程ID分组,按照操作时间从小到大排序,得到字段rk,根据流程ID和isend分组,按照操作时间从小到大排序,得到字段rk_。rk_=1说明是流程开始节点或者第一个结束节点,如果rk<=rk_说明节点是在isend=0的分组里还没有结束。
insert overwrite table dwd.dwd_tableName_di
select
requestid,
workflowname,
nodename,
operdate,
isbereject,
rk,
row_number() over(partition by requestid order by operdate desc) as rk_desc,
pre_node_time,
datediff(operdate,pre_node_time) as node_diff_days,
round((unix_timestamp(operdate,'yyyy-MM-dd HH:mm:ss')-unix_timestamp(pre_node_time,'yyyy-MM-dd HH:mm:ss'))/3600) as node_diff_hours,
unix_timestamp(operdate,'yyyy-MM-dd HH:mm:ss')-unix_timestamp(pre_node_time,'yyyy-MM-dd HH:mm:ss') as node_diff_seconds
from(
select
requestid,
workflowname,
nodename,
operdate,
isbereject,
row_number() over(partition by requestid order by operdate) as rk,
lag(operdate,1,null) over(partition by requestid order by operdate) as pre_node_time,
row_number() over(partition by requestid,isend order by operdate) as rk_
from (
select
requestid,
nodename,
workflowname,
concat(operatedate,' ',operatetime) as operdate,
isbereject,
isend
from
dbname.tableName
)tmp
)ttt
where rk_ = 1 or rk <= rk_