首尾值与Nth值获取
获取分组内的第一个值、最后一个值和第N个值,是窗口函数应用中的常见场景。
📊 支持情况
| 数据库 | FIRST_VALUE | LAST_VALUE | NTH_VALUE | 备注 |
|---|---|---|---|---|
| MySQL | ✅ 8.0+ | ✅ 8.0+ | ✅ 8.0+ | 完整支持 |
| Oracle | ✅ | ✅ | ✅ | 完整支持 |
| ClickHouse | ✅ | ✅ | ✅ | 语法略有不同 |
| Hologres | ✅ | ✅ | ✅ | 兼容PG |
| MaxCompute | ✅ | ✅ | ✅ | 完整支持 |
| Hive | ✅ 2.1+ | ✅ 2.1+ | ✅ 2.1+ | 完整支持 |
1. FIRST_VALUE – 获取第一个值
场景:标记每个用户的首次购买日期
MySQL / Hive / MaxCompute / Hologres
SELECT
user_id,
order_date,
amount,
FIRST_VALUE(order_date) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_order_date,
FIRST_VALUE(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_order_amount
FROM orders;
Oracle
SELECT
user_id,
order_date,
amount,
FIRST_VALUE(order_date) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_order_date,
FIRST_VALUE(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_order_amount
FROM orders;
ClickHouse
SELECT
user_id,
order_date,
amount,
firstValue(order_date) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_order_date,
firstValue(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_order_amount
FROM orders;
输出示例:
| user_id | order_date | amount | first_order_date | first_order_amount |
|---|---|---|---|---|
| 101 | 2024-01-01 | 500 | 2024-01-01 | 500 |
| 101 | 2024-01-05 | 300 | 2024-01-01 | 500 |
| 101 | 2024-01-10 | 800 | 2024-01-01 | 500 |
2. LAST_VALUE – 获取最后一个值
默认行为与ROWS BETWEEN
-- ❌ 默认行为(可能不是预期结果)
SELECT
user_id,
order_date,
amount,
LAST_VALUE(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS last_amount -- 实际是当前行的下一个值
FROM orders;
-- ✅ 正确写法:使用ROWS BETWEEN
SELECT
user_id,
order_date,
amount,
LAST_VALUE(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_amount
FROM orders;
简化写法(MaxCompute/Hive)
-- 使用IGNORE NULLS处理空值
SELECT
user_id,
order_date,
amount,
LAST_VALUE(amount, TRUE) OVER (
PARTITION BY user_id
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_amount
FROM orders;
3. NTH_VALUE – 获取第N个值
MySQL 8.0+
SELECT
user_id,
order_date,
amount,
NTH_VALUE(amount, 1) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_amount,
NTH_VALUE(amount, 2) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS second_amount,
NTH_VALUE(amount, 3) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS third_amount
FROM orders;
Oracle
SELECT
user_id,
order_date,
amount,
NTH_VALUE(amount, 1) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_amount,
NTH_VALUE(amount, 2) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS second_amount
FROM orders;
ClickHouse
SELECT
user_id,
order_date,
amount,
nthValue(amount, 1) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_amount,
nthValue(amount, 2) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS second_amount
FROM orders;
4. IGNORE NULLS / RESPECT NULLS
处理NULL值
-- MySQL 8.0+
SELECT
user_id,
day,
amount,
-- 忽略NULL,获取第一个非空值
FIRST_VALUE(amount IGNORE NULLS) OVER (
PARTITION BY user_id
ORDER BY day
) AS first_valid_amount,
-- 获取最后一个非空值
LAST_VALUE(amount IGNORE NULLS) OVER (
PARTITION BY user_id
ORDER BY day
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_valid_amount
FROM (
SELECT 1 AS user_id, '2024-01-01' AS day, 100 AS amount
UNION ALL SELECT 1, '2024-01-02', NULL
UNION ALL SELECT 1, '2024-01-03', 150
) t;
Oracle
SELECT
user_id,
day,
amount,
FIRST_VALUE(amount IGNORE NULLS) OVER (
PARTITION BY user_id
ORDER BY day
) AS first_valid,
LAST_VALUE(amount IGNORE NULLS) OVER (
PARTITION BY user_id
ORDER BY day
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_valid
FROM ...
ClickHouse
-- ClickHouse使用distinct组合
SELECT
user_id,
day,
amount,
anyLast(amount) OVER (PARTITION BY user_id ORDER BY day) AS last_amount
FROM orders
WHERE amount IS NOT NULL;
5. 组合使用:计算变化量
场景:计算相邻订单金额变化
-- MySQL / Hive / MaxCompute
SELECT
user_id,
order_date,
amount,
LAG(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS prev_amount,
-- 变化量
amount - LAG(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS amount_change,
-- 变化率
ROUND(
(amount - LAG(amount) OVER (PARTITION BY user_id ORDER BY order_date))
/ NULLIF(LAG(amount) OVER (PARTITION BY user_id ORDER BY order_date), 0) * 100,
2
) AS change_pct
FROM orders;
Oracle
SELECT
user_id,
order_date,
amount,
LAG(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS prev_amount,
amount - LAG(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS amount_change
FROM orders;
6. 实战:用户行为分析
场景:标记用户首次/最近一次行为
-- MySQL 8.0+
WITH user_behaviors AS (
SELECT
user_id,
behavior_type,
event_time,
FIRST_VALUE(event_time) OVER (
PARTITION BY user_id
ORDER BY event_time
) AS first_event,
LAST_VALUE(event_time) OVER (
PARTITION BY user_id
ORDER BY event_time
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_event
FROM user_events
)
SELECT
user_id,
behavior_type,
event_time,
CASE WHEN event_time = first_event THEN '首次' ELSE '' END AS is_first,
CASE WHEN event_time = last_event THEN '最近' ELSE '' END AS is_last
FROM user_behaviors;
7. 按时间范围获取首尾值
场景:获取每周的开始和结束销售额
-- MySQL
SELECT
YEARWEEK(sale_date) AS week,
MIN(sale_date) AS week_start,
MAX(sale_date) AS week_end,
SUM(amount) AS weekly_sales,
FIRST_VALUE(amount) OVER (
PARTITION BY YEARWEEK(sale_date)
ORDER BY sale_date
) AS first_day_sales,
LAST_VALUE(amount) OVER (
PARTITION BY YEARWEEK(sale_date)
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_day_sales
FROM daily_sales
GROUP BY YEARWEEK(sale_date), sale_date, amount;
Hive
SELECT
week,
MIN(sale_date) AS week_start,
MAX(sale_date) AS week_end,
SUM(amount) AS weekly_sales,
FIRST_VALUE(amount) OVER (
PARTITION BY week
ORDER BY sale_date
) AS first_day_sales,
LAST_VALUE(amount) OVER (
PARTITION BY week
ORDER BY sale_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_day_sales
FROM (
SELECT
sale_date,
amount,
DATE_FORMAT(sale_date, 'yyyy-ww') AS week
FROM daily_sales
) t
GROUP BY week, sale_date, amount;
8. 特殊场景:分区内的排名
场景:获取每个班级成绩最高和最低的学生
-- MySQL 8.0+
SELECT
class_id,
student_id,
score,
FIRST_VALUE(student_id) OVER (
PARTITION BY class_id
ORDER BY score DESC
) AS top_student,
LAST_VALUE(student_id) OVER (
PARTITION BY class_id
ORDER BY score DESC
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS bottom_student
FROM exam_scores;
使用RANK实现
-- 更清晰的方式
SELECT
class_id,
MAX(CASE WHEN rk_asc = 1 THEN student_id END) AS top_student,
MIN(CASE WHEN rk_desc = 1 THEN student_id END) AS bottom_student
FROM (
SELECT
class_id,
student_id,
score,
RANK() OVER (PARTITION BY class_id ORDER BY score DESC) AS rk_asc,
RANK() OVER (PARTITION BY class_id ORDER BY score ASC) AS rk_desc
FROM exam_scores
) t
WHERE rk_asc = 1 OR rk_desc = 1
GROUP BY class_id;
⚠️ 注意事项
1. LAST_VALUE的默认帧
-- LAST_VALUE默认帧是 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-- 这意味着默认返回的是当前行的前一行,不是最后一行的值
-- ✅ 正确写法:指定完整的窗口帧
LAST_VALUE(col) OVER (
PARTITION BY ...
ORDER BY ...
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)
-- ✅ 或者使用 FIRST_VALUE 逆向排序
FIRST_VALUE(col) OVER (
PARTITION BY ...
ORDER BY ... DESC
)
2. NTH_VALUE越界处理
-- 当N超过数据行数时,NTH_VALUE返回NULL
-- MySQL 8.0+
SELECT
user_id,
NTH_VALUE(amount, 5) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS fifth_amount
FROM orders;
3. 性能考虑
-- ✅ 建议:在子查询中先过滤和排序
SELECT *
FROM (
SELECT
user_id,
order_date,
amount,
FIRST_VALUE(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_amount
FROM orders
WHERE order_date >= '2024-01-01' -- 先过滤
) t;
-- ❌ 避免:窗口函数中包含复杂表达式
SELECT
user_id,
FIRST_VALUE(CASE WHEN amount > 0 THEN amount ELSE 0 END) OVER (...) AS first_valid
FROM ...
📝 练习题
题目:用户订单分析
-- 建表
CREATE TABLE user_orders (
order_id INT,
user_id INT,
product_name STRING,
amount DECIMAL(10,2),
order_date DATE
);
-- 插入数据
INSERT INTO user_orders VALUES
(1, 101, 'iPhone', 8000, '2024-01-01'),
(2, 101, 'MacBook', 12000, '2024-01-05'),
(3, 101, 'AirPods', 2000, '2024-01-10'),
(4, 102, 'iPad', 5000, '2024-01-02'),
(5, 102, 'Apple Watch', 3000, '2024-01-08');
要求:
- 查询每个用户的首单和末单产品及金额
- 查询每个用户的第二笔订单信息
- 计算每个用户相邻订单的金额差
参考SQL:
-- 1. 首单和末单
SELECT
user_id,
order_date,
product_name,
amount,
first_product,
first_amount,
last_product,
last_amount
FROM (
SELECT
user_id,
order_date,
product_name,
amount,
FIRST_VALUE(product_name) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_product,
FIRST_VALUE(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS first_amount,
LAST_VALUE(product_name) OVER (
PARTITION BY user_id
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_product,
LAST_VALUE(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS last_amount
FROM user_orders
) t;
-- 2. 第二笔订单
SELECT *
FROM (
SELECT
user_id,
order_date,
product_name,
amount,
NTH_VALUE(product_name, 2) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS second_product,
NTH_VALUE(amount, 2) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS second_amount
FROM user_orders
) t
WHERE second_product IS NOT NULL;
-- 3. 相邻订单金额差
SELECT
user_id,
order_date,
amount,
prev_amount,
amount - prev_amount AS amount_diff
FROM (
SELECT
user_id,
order_date,
amount,
LAG(amount) OVER (
PARTITION BY user_id
ORDER BY order_date
) AS prev_amount
FROM user_orders
) t;
順子の杂货铺


