我来为你详细介绍MySQL的窗口函数(Window Functions),这是MySQL 8.0开始支持的强大功能。
🎯 窗口函数基础语法
窗口函数名([参数]) OVER (
[PARTITION BY 列名]
[ORDER BY 列名]
[窗口帧]
)
📊 主要窗口函数分类
1. 排名函数
ROW_NUMBER() - 连续排名
-- 为每行分配唯一连续编号
SELECT
name,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) as row_num
FROM employees;
RANK() - 并列排名(有间隔)
-- 相同值得到相同排名,下一个排名会跳过
SELECT
name,
salary,
RANK() OVER (ORDER BY salary DESC) as rank_num
FROM employees;
-- 结果:1, 2, 2, 4, 5...
DENSE_RANK() - 并列排名(无间隔)
-- 相同值得到相同排名,下一个排名连续
SELECT
name,
salary,
DENSE_RANK() OVER (ORDER BY salary DESC) as dense_rank_num
FROM employees;
-- 结果:1, 2, 2, 3, 4...
PERCENT_RANK() - 百分比排名
SELECT
name,
salary,
PERCENT_RANK() OVER (ORDER BY salary) as percent_rank
FROM employees;
-- 返回值范围:0-1
2. 聚合窗口函数
SUM() OVER - 累计求和
-- 按薪资排序的累计和
SELECT
name,
salary,
SUM(salary) OVER (ORDER BY salary) as running_total
FROM employees;
-- 按部门分组的累计和
SELECT
department,
name,
salary,
SUM(salary) OVER (PARTITION BY department ORDER BY salary) as dept_running_total
FROM employees;
AVG() OVER - 移动平均
-- 当前行和前两行的平均值
SELECT
date,
sales,
AVG(sales) OVER (
ORDER BY date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) as moving_avg_3
FROM sales_data;
COUNT() OVER - 累计计数
SELECT
name,
department,
COUNT(*) OVER (PARTITION BY department ORDER BY hire_date) as dept_count
FROM employees;
3. 取值函数
LAG() - 取前N行的值
-- 获取前一行的薪资
SELECT
name,
salary,
LAG(salary, 1) OVER (ORDER BY hire_date) as prev_salary,
salary - LAG(salary, 1) OVER (ORDER BY hire_date) as salary_diff
FROM employees;
LEAD() - 取后N行的值
-- 获取后一行的薪资
SELECT
name,
salary,
LEAD(salary, 1) OVER (ORDER BY hire_date) as next_salary
FROM employees;
FIRST_VALUE() - 窗口内第一个值
-- 每个部门薪资最高的员工
SELECT
department,
name,
salary,
FIRST_VALUE(name) OVER (
PARTITION BY department
ORDER BY salary DESC
) as highest_paid
FROM employees;
LAST_VALUE() - 窗口内最后一个值
-- 注意:需要指定窗口帧范围
SELECT
department,
name,
salary,
LAST_VALUE(name) OVER (
PARTITION BY department
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as lowest_paid
FROM employees;
NTH_VALUE() - 窗口内第N个值
-- 获取每个部门薪资第二高的员工
SELECT
department,
name,
salary,
NTH_VALUE(name, 2) OVER (
PARTITION BY department
ORDER BY salary DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as second_highest_paid
FROM employees;
🔧 窗口帧(Window Frame)详解
窗口帧语法
{ROWS | RANGE} BETWEEN 开始边界 AND 结束边界
边界选项
UNBOUNDED PRECEDING
- 分区开始N PRECEDING
- 当前行前N行CURRENT ROW
- 当前行N FOLLOWING
- 当前行后N行UNBOUNDED FOLLOWING
- 分区结束
常用窗口帧示例
-- 当前行和前两行
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
-- 前一行到后一行
ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING
-- 整个分区
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
-- 当前行到分区结束
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING
💡 实际应用场景
1. 计算同比、环比
-- 计算销售额月度增长率
SELECT
month,
sales,
LAG(sales, 1) OVER (ORDER BY month) as prev_month_sales,
ROUND((sales - LAG(sales, 1) OVER (ORDER BY month)) / LAG(sales, 1) OVER (ORDER BY month) * 100, 2) as growth_rate
FROM monthly_sales;
2. Top N 查询
-- 每个部门薪资前3名
SELECT *
FROM (
SELECT
department,
name,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as rn
FROM employees
) ranked
WHERE rn <= 3;
3. 累计百分比
-- 计算销售额累计占比
SELECT
product,
sales,
SUM(sales) OVER () as total_sales,
ROUND(SUM(sales) OVER (ORDER BY sales DESC) / SUM(sales) OVER () * 100, 2) as cumulative_percent
FROM product_sales;
4. 移动平均线
-- 7天移动平均
SELECT
date,
price,
AVG(price) OVER (
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) as ma_7
FROM stock_prices;
5. 分组内占比
-- 每个员工在部门内的薪资占比
SELECT
department,
name,
salary,
ROUND(salary / SUM(salary) OVER (PARTITION BY department) * 100, 2) as dept_salary_percent
FROM employees;
⚠️ 注意事项和最佳实践
1. 性能优化
-- 为ORDER BY的列创建索引
CREATE INDEX idx_salary ON employees(salary);
-- 避免在窗口函数中使用复杂表达式
-- 不好的写法
SELECT name, UPPER(name) OVER (ORDER BY LOWER(name))
-- 好的写法
SELECT name, UPPER(name) OVER (ORDER BY name)
2. 窗口函数与GROUP BY的区别
-- GROUP BY - 减少行数
SELECT department, AVG(salary)
FROM employees
GROUP BY department;
-- 窗口函数 - 保持原有行数
SELECT
department,
name,
salary,
AVG(salary) OVER (PARTITION BY department) as dept_avg
FROM employees;
3. 常见错误避免
-- 错误:在WHERE中直接使用窗口函数
-- SELECT * FROM employees WHERE ROW_NUMBER() OVER (ORDER BY salary) = 1;
-- 正确:使用子查询
SELECT * FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY salary DESC) as rn
FROM employees
) t WHERE rn = 1;
🔍 高级技巧
1. 多个窗口函数组合
SELECT
name,
salary,
department,
-- 总体排名
ROW_NUMBER() OVER (ORDER BY salary DESC) as overall_rank,
-- 部门内排名
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) as dept_rank,
-- 部门平均薪资
AVG(salary) OVER (PARTITION BY department) as dept_avg,
-- 与部门平均薪资的差值
salary - AVG(salary) OVER (PARTITION BY department) as diff_from_avg
FROM employees;
2. 使用命名窗口
SELECT
name,
salary,
department,
ROW_NUMBER() OVER dept_window as dept_rank,
AVG(salary) OVER dept_window as dept_avg
FROM employees
WINDOW dept_window AS (PARTITION BY department ORDER BY salary DESC);