SQL经典案例之窗口函数的计算使用
构造测试数据
-- drop table IF EXISTS t_windows;
drop table t_windows;
create table t_windows(tid int,tname varchar(10),tnum int);
insert into t_windows values(1,'ORACLE',72);
insert into t_windows values(2,'MYSQL',78);
insert into t_windows values(3,'POSTGRESQL',85);
insert into t_windows values(4,'GAUSSDB',85);
insert into t_windows values(5,'SQLSERVER',88);
insert into t_windows values(6,'DB2',92);
insert into t_windows values(7,'TIDB',95);
insert into t_windows values(8,'MONGODB',95);
insert into t_windows values(9,'GODB',100);
commit;
复制
计算累计值
窗口函数 SUM OVER 能轻松地生成数据累计(移动)的总计
select tid,tname,tnum,
sum(tnum) over(order by tnum) as num_total1,
sum(tnum) over(order by tnum,tname) as num_total2
from t_windows
order by tnum;
复制
当排序列 tnum 存在重复数据时会出现重复值提前相加问题,对此应保证窗口函数排序的值是唯一的
计算众数
众数:数据集中出现频率(次数)最高的那个数据(元素),可存在多个
dense_rank
基础通用写法
select tnum
from (
select tnum,dense_rank() over(order by cnt desc) as rnk
from (select tnum,count(*) as cnt from t_windows group by tnum) as x
) as y
where rnk = 1;
复制
CTE
-- 当存在多个重复的众数时用limit无法确定提取个数
select tnum,count(*) from t_windows group by 1 order by 2 desc limit 1;
-- 按次数再按数值大小降序排序
select tnum,count(*) from t_windows group by 1 order by 2 desc,1 desc limit 1;
-- CTE 写法
with x as(
select tnum,count(*) as num from t_windows group by tnum),
y as(
select max(num) as tmax from x)
select tnum from y left join x on x.num = y.tmax;
复制
keep
Oracle 实现的小细节
-- Oracle 子查询表不能用 as 显示指定为别名,group by 后面要写明具体字段名不能用数字代替
select tnum
from (
select tnum,dense_rank() over(order by cnt desc) rnk
from (select tnum,count(*) as cnt from t_windows group by tnum) x
) y
where rnk = 1;
-- 使用聚合函数MAX的KEEP扩展来查找众数只返回出现次数最多的最大值,等同于分组排序后的limit 1
select max(tnum) keep(dense_rank first order by cnt desc) tnum_max
from (
select tnum,count(*) as cnt from t_windows group by tnum
) ;
-- rownum 实现 limit 1
select tnum,count(*) from t_windows group by tnum order by count(*) desc,tnum desc;
select tnum from (select tnum,count(*) from t_windows group by tnum order by count(*) desc,tnum desc) x where rownum = 1;
复制
计算中位数
中位数(中值):顺序排列的一组数据中居于中间位置的数,偶数个则为中间两位和的平均值
percentile_cont
-- postgresql/oracle
select percentile_cont(0.5) within group(order by tnum) from t_windows;
-- oracle的MEDIAN函数也能实现
select median(tnum) from t_windows;
-- sqlserver
select percentile_cont(0.5) within group(order by tnum) over() from t_windows;
复制
percent_rank
mysql没有percentile_cont函数,可采用权变结合percent_rank和CTE来实现,找出百分位数不超过50的最大值和百分位数不低于50的最小值,再计算其平均值
- percent_rank 窗口函数无法放置于where后面,故采用CTE写法
- order by 排序处无法使用 union ,故多写了两个CTE
with rank_tab(tnum,rank_sal) as(
select tnum,percent_rank() over(order by tnum) as rank_sal from t_windows
),
inter_asc as(
select tnum,rank_sal from rank_tab where rank_sal>=0.5 order by tnum limit 1
),
inter_desc as(
select tnum,rank_sal from rank_tab where rank_sal<=0.5 order by tnum desc limit 1
),
inter as(
select tnum from inter_asc
union
select tnum from inter_desc
)select avg(tnum) as MedianSal from inter;
复制
计算截尾均值
在计算平均值时,为了降低两端最值对整体均值的影响,会考虑剔除最大值和最小值后再统计平均值
基本通用写法,使用子查询和聚合函数将最大值和最小值排除在外:
select avg(tnum) from t_windows
where tnum not in (
(select min(tnum) from t_windows),
(select max(tnum) from t_windows)
);
复制
使用窗口函数将最大值和最小值排除在外:
select avg(tnum) from(
select tnum,min(tnum) over() min_num,
max(tnum) over() max_num
from t_windows
) x
where tnum not in (min_num,max_num);
复制
注意: 以上两种方式当存在多个相同的最大值或最小值时都会被排除,其次则是注意不同数据库返回的数据精度也有差别
-- 即多插入一条最大值计算的平均值还是一样的
insert into t_windows values(10,'GODB',100);
delete from t_windows where tid=10;
复制
如果只是要排除单个最高值和最低值,可以从SUM聚合结果中减去它们再执行除法计算
select (sum(tnum)-min(tnum)-max(tnum))/(count(*)-2) from t_windows;
复制
拓展
窗口排序函数
- rank:跳跃排序,1、2、2、4、5 . . .
- dense_rank:重复无间断排序,1、2、2、3、4 …
- row_number:递增排序,1、2、3、4、5 …
-- dense_rank
select tnum,dense_rank() over(order by cnt desc) as rnk
from (select tnum,count(*) as cnt from t_windows group by 1 ) as x;
-- rank
select tnum,rank() over(order by cnt desc) as rnk
from (select tnum,count(*) as cnt from t_windows group by 1 ) as x;
-- row_number
select tnum,row_number() over(order by cnt desc) as rnk
from (select tnum,count(*) as cnt from t_windows group by 1 ) as x;
复制
窗口偏移函数
- LAG(字段,偏移量,无值时返回):向下偏移,获取上一条记录
- LEAD(字段,偏移量,无值时返回):向上偏移,获取下一条记录
- FIRST_VALUE:分区内排序后,获取第一个值和最后一个值
- LAST_VALUE:分区内排序后,获取最后一个值
insert into t_windows values(10,'GODB',200);
select *,
LAG(tnum) over(order by tnum desc) as lag_num,
LEAD(tnum) over(order by tnum desc) as lead_num,
LAG(tnum,2,0) over(order by tnum desc) as lag_num_tab,
LEAD(tnum,2,0) over(order by tnum desc) as lead_num_tab,
FIRST_VALUE (tnum) OVER(PARTITION BY tname ORDER BY tnum desc) AS FirstValue,
LAST_VALUE (tnum) OVER(PARTITION BY tname ORDER BY tnum desc ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS LastValue
from t_windows;
复制
窗口统计函数
- cume_dist():0< cume_dist <=1,计算公式 = ((统计的列的种类)/(统计的列的总数))* 每个重复种类中的个数
- percent_rank():0<= percent_rank <=1,计算公式 = ((统计的列的种类)/(统计的列的总数))*100%,第一个总是从 0 开始的,重复几次那就会有几个 0,最后一个是 1
SELECT tid,tname,tnum,
cume_dist() OVER (ORDER BY tnum) as cume_dist,
percent_rank() over (order by tnum) as percent_rank
FROM t_windows;
复制
「喜欢这篇文章,您的关注和赞赏是给作者最好的鼓励」
关注作者
【版权声明】本文为墨天轮用户原创内容,转载时必须标注文章的来源(墨天轮),文章链接,文章作者等基本信息,否则作者和墨天轮有权追究责任。如果您发现墨天轮中有涉嫌抄袭或者侵权的内容,欢迎发送邮件至:contact@modb.pro进行举报,并提供相关证据,一经查实,墨天轮将立刻删除相关内容。
评论
等闲识得东风面,万紫千红总是春。今天我们将迎来春季的第一个节气,立春!
2月前

评论
相关阅读
2025年4月中国数据库流行度排行榜:OB高分复登顶,崖山稳驭撼十强
墨天轮编辑部
2079次阅读
2025-04-09 15:33:27
数据库国产化替代深化:DBA的机遇与挑战
代晓磊
955次阅读
2025-04-27 16:53:22
2025年3月国产数据库大事记
墨天轮编辑部
918次阅读
2025-04-03 15:21:16
2025年3月国产数据库中标情况一览:TDSQL大单622万、GaussDB大单581万……
通讯员
629次阅读
2025-04-10 15:35:48
征文大赛 |「码」上数据库—— KWDB 2025 创作者计划启动
KaiwuDB
516次阅读
2025-04-01 20:42:12
数据库,没有关税却有壁垒
多明戈教你玩狼人杀
507次阅读
2025-04-11 09:38:42
国产数据库需要扩大场景覆盖面才能在竞争中更有优势
白鳝的洞穴
489次阅读
2025-04-14 09:40:20
最近我为什么不写评论国产数据库的文章了
白鳝的洞穴
441次阅读
2025-04-07 09:44:54
【活动】分享你的压箱底干货文档,三篇解锁进阶奖励!
墨天轮编辑部
399次阅读
2025-04-17 17:02:24
天津市政府数据库框采结果公布,7家数据库产品入选!
通讯员
377次阅读
2025-04-10 12:32:35