顺子の杂货铺
生命不息,折腾不止,且行且珍惜~

05-首尾值与Nth值获取

DMIT VPS

首尾值与Nth值获取

获取分组内的第一个值、最后一个值和第N个值,是窗口函数应用中的常见场景。

📊 支持情况

数据库 FIRST_VALUE LAST_VALUE NTH_VALUE 备注
MySQL ✅ 8.0+ ✅ 8.0+ ✅ 8.0+ 完整支持
Oracle 完整支持
ClickHouse 语法略有不同
Hologres 兼容PG
MaxCompute 完整支持
Hive ✅ 2.1+ ✅ 2.1+ ✅ 2.1+ 完整支持

1. FIRST_VALUE – 获取第一个值

场景:标记每个用户的首次购买日期

MySQL / Hive / MaxCompute / Hologres

SELECT
    user_id,
    order_date,
    amount,
    FIRST_VALUE(order_date) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_order_date,
    FIRST_VALUE(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_order_amount
FROM orders;

Oracle

SELECT
    user_id,
    order_date,
    amount,
    FIRST_VALUE(order_date) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_order_date,
    FIRST_VALUE(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_order_amount
FROM orders;

ClickHouse

SELECT
    user_id,
    order_date,
    amount,
    firstValue(order_date) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_order_date,
    firstValue(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_order_amount
FROM orders;

输出示例:

user_id order_date amount first_order_date first_order_amount
101 2024-01-01 500 2024-01-01 500
101 2024-01-05 300 2024-01-01 500
101 2024-01-10 800 2024-01-01 500

2. LAST_VALUE – 获取最后一个值

默认行为与ROWS BETWEEN

-- ❌ 默认行为(可能不是预期结果)
SELECT
    user_id,
    order_date,
    amount,
    LAST_VALUE(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS last_amount  -- 实际是当前行的下一个值
FROM orders;

-- ✅ 正确写法:使用ROWS BETWEEN
SELECT
    user_id,
    order_date,
    amount,
    LAST_VALUE(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_amount
FROM orders;

简化写法(MaxCompute/Hive)

-- 使用IGNORE NULLS处理空值
SELECT
    user_id,
    order_date,
    amount,
    LAST_VALUE(amount, TRUE) OVER (
        PARTITION BY user_id
        ORDER BY order_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_amount
FROM orders;

3. NTH_VALUE – 获取第N个值

MySQL 8.0+

SELECT
    user_id,
    order_date,
    amount,
    NTH_VALUE(amount, 1) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_amount,
    NTH_VALUE(amount, 2) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS second_amount,
    NTH_VALUE(amount, 3) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS third_amount
FROM orders;

Oracle

SELECT
    user_id,
    order_date,
    amount,
    NTH_VALUE(amount, 1) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_amount,
    NTH_VALUE(amount, 2) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS second_amount
FROM orders;

ClickHouse

SELECT
    user_id,
    order_date,
    amount,
    nthValue(amount, 1) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS first_amount,
    nthValue(amount, 2) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS second_amount
FROM orders;

4. IGNORE NULLS / RESPECT NULLS

处理NULL值

-- MySQL 8.0+
SELECT
    user_id,
    day,
    amount,
    -- 忽略NULL,获取第一个非空值
    FIRST_VALUE(amount IGNORE NULLS) OVER (
        PARTITION BY user_id
        ORDER BY day
    ) AS first_valid_amount,
    -- 获取最后一个非空值
    LAST_VALUE(amount IGNORE NULLS) OVER (
        PARTITION BY user_id
        ORDER BY day
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_valid_amount
FROM (
    SELECT 1 AS user_id, '2024-01-01' AS day, 100 AS amount
    UNION ALL SELECT 1, '2024-01-02', NULL
    UNION ALL SELECT 1, '2024-01-03', 150
) t;

Oracle

SELECT
    user_id,
    day,
    amount,
    FIRST_VALUE(amount IGNORE NULLS) OVER (
        PARTITION BY user_id
        ORDER BY day
    ) AS first_valid,
    LAST_VALUE(amount IGNORE NULLS) OVER (
        PARTITION BY user_id
        ORDER BY day
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_valid
FROM ...

ClickHouse

-- ClickHouse使用distinct组合
SELECT
    user_id,
    day,
    amount,
    anyLast(amount) OVER (PARTITION BY user_id ORDER BY day) AS last_amount
FROM orders
WHERE amount IS NOT NULL;

5. 组合使用:计算变化量

场景:计算相邻订单金额变化

-- MySQL / Hive / MaxCompute
SELECT
    user_id,
    order_date,
    amount,
    LAG(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS prev_amount,
    -- 变化量
    amount - LAG(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS amount_change,
    -- 变化率
    ROUND(
        (amount - LAG(amount) OVER (PARTITION BY user_id ORDER BY order_date))
        / NULLIF(LAG(amount) OVER (PARTITION BY user_id ORDER BY order_date), 0) * 100,
        2
    ) AS change_pct
FROM orders;

Oracle

SELECT
    user_id,
    order_date,
    amount,
    LAG(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS prev_amount,
    amount - LAG(amount) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS amount_change
FROM orders;

6. 实战:用户行为分析

场景:标记用户首次/最近一次行为

-- MySQL 8.0+
WITH user_behaviors AS (
    SELECT
        user_id,
        behavior_type,
        event_time,
        FIRST_VALUE(event_time) OVER (
            PARTITION BY user_id
            ORDER BY event_time
        ) AS first_event,
        LAST_VALUE(event_time) OVER (
            PARTITION BY user_id
            ORDER BY event_time
            ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
        ) AS last_event
    FROM user_events
)
SELECT
    user_id,
    behavior_type,
    event_time,
    CASE WHEN event_time = first_event THEN '首次' ELSE '' END AS is_first,
    CASE WHEN event_time = last_event THEN '最近' ELSE '' END AS is_last
FROM user_behaviors;

7. 按时间范围获取首尾值

场景:获取每周的开始和结束销售额

-- MySQL
SELECT
    YEARWEEK(sale_date) AS week,
    MIN(sale_date) AS week_start,
    MAX(sale_date) AS week_end,
    SUM(amount) AS weekly_sales,
    FIRST_VALUE(amount) OVER (
        PARTITION BY YEARWEEK(sale_date)
        ORDER BY sale_date
    ) AS first_day_sales,
    LAST_VALUE(amount) OVER (
        PARTITION BY YEARWEEK(sale_date)
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_day_sales
FROM daily_sales
GROUP BY YEARWEEK(sale_date), sale_date, amount;

Hive

SELECT
    week,
    MIN(sale_date) AS week_start,
    MAX(sale_date) AS week_end,
    SUM(amount) AS weekly_sales,
    FIRST_VALUE(amount) OVER (
        PARTITION BY week
        ORDER BY sale_date
    ) AS first_day_sales,
    LAST_VALUE(amount) OVER (
        PARTITION BY week
        ORDER BY sale_date
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS last_day_sales
FROM (
    SELECT
        sale_date,
        amount,
        DATE_FORMAT(sale_date, 'yyyy-ww') AS week
    FROM daily_sales
) t
GROUP BY week, sale_date, amount;

8. 特殊场景:分区内的排名

场景:获取每个班级成绩最高和最低的学生

-- MySQL 8.0+
SELECT
    class_id,
    student_id,
    score,
    FIRST_VALUE(student_id) OVER (
        PARTITION BY class_id
        ORDER BY score DESC
    ) AS top_student,
    LAST_VALUE(student_id) OVER (
        PARTITION BY class_id
        ORDER BY score DESC
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) AS bottom_student
FROM exam_scores;

使用RANK实现

-- 更清晰的方式
SELECT
    class_id,
    MAX(CASE WHEN rk_asc = 1 THEN student_id END) AS top_student,
    MIN(CASE WHEN rk_desc = 1 THEN student_id END) AS bottom_student
FROM (
    SELECT
        class_id,
        student_id,
        score,
        RANK() OVER (PARTITION BY class_id ORDER BY score DESC) AS rk_asc,
        RANK() OVER (PARTITION BY class_id ORDER BY score ASC) AS rk_desc
    FROM exam_scores
) t
WHERE rk_asc = 1 OR rk_desc = 1
GROUP BY class_id;

⚠️ 注意事项

1. LAST_VALUE的默认帧

-- LAST_VALUE默认帧是 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
-- 这意味着默认返回的是当前行的前一行,不是最后一行的值

-- ✅ 正确写法:指定完整的窗口帧
LAST_VALUE(col) OVER (
    PARTITION BY ...
    ORDER BY ...
    ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
)

-- ✅ 或者使用 FIRST_VALUE 逆向排序
FIRST_VALUE(col) OVER (
    PARTITION BY ...
    ORDER BY ... DESC
)

2. NTH_VALUE越界处理

-- 当N超过数据行数时,NTH_VALUE返回NULL
-- MySQL 8.0+
SELECT
    user_id,
    NTH_VALUE(amount, 5) OVER (
        PARTITION BY user_id
        ORDER BY order_date
    ) AS fifth_amount
FROM orders;

3. 性能考虑

-- ✅ 建议:在子查询中先过滤和排序
SELECT *
FROM (
    SELECT
        user_id,
        order_date,
        amount,
        FIRST_VALUE(amount) OVER (
            PARTITION BY user_id
            ORDER BY order_date
        ) AS first_amount
    FROM orders
    WHERE order_date >= '2024-01-01'  -- 先过滤
) t;

-- ❌ 避免:窗口函数中包含复杂表达式
SELECT
    user_id,
    FIRST_VALUE(CASE WHEN amount > 0 THEN amount ELSE 0 END) OVER (...) AS first_valid
FROM ...

📝 练习题

题目:用户订单分析

-- 建表
CREATE TABLE user_orders (
    order_id INT,
    user_id INT,
    product_name STRING,
    amount DECIMAL(10,2),
    order_date DATE
);

-- 插入数据
INSERT INTO user_orders VALUES
(1, 101, 'iPhone', 8000, '2024-01-01'),
(2, 101, 'MacBook', 12000, '2024-01-05'),
(3, 101, 'AirPods', 2000, '2024-01-10'),
(4, 102, 'iPad', 5000, '2024-01-02'),
(5, 102, 'Apple Watch', 3000, '2024-01-08');

要求

  1. 查询每个用户的首单和末单产品及金额
  2. 查询每个用户的第二笔订单信息
  3. 计算每个用户相邻订单的金额差

参考SQL

-- 1. 首单和末单
SELECT
    user_id,
    order_date,
    product_name,
    amount,
    first_product,
    first_amount,
    last_product,
    last_amount
FROM (
    SELECT
        user_id,
        order_date,
        product_name,
        amount,
        FIRST_VALUE(product_name) OVER (
            PARTITION BY user_id
            ORDER BY order_date
        ) AS first_product,
        FIRST_VALUE(amount) OVER (
            PARTITION BY user_id
            ORDER BY order_date
        ) AS first_amount,
        LAST_VALUE(product_name) OVER (
            PARTITION BY user_id
            ORDER BY order_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
        ) AS last_product,
        LAST_VALUE(amount) OVER (
            PARTITION BY user_id
            ORDER BY order_date
            ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
        ) AS last_amount
    FROM user_orders
) t;

-- 2. 第二笔订单
SELECT *
FROM (
    SELECT
        user_id,
        order_date,
        product_name,
        amount,
        NTH_VALUE(product_name, 2) OVER (
            PARTITION BY user_id
            ORDER BY order_date
        ) AS second_product,
        NTH_VALUE(amount, 2) OVER (
            PARTITION BY user_id
            ORDER BY order_date
        ) AS second_amount
    FROM user_orders
) t
WHERE second_product IS NOT NULL;

-- 3. 相邻订单金额差
SELECT
    user_id,
    order_date,
    amount,
    prev_amount,
    amount - prev_amount AS amount_diff
FROM (
    SELECT
        user_id,
        order_date,
        amount,
        LAG(amount) OVER (
            PARTITION BY user_id
            ORDER BY order_date
        ) AS prev_amount
    FROM user_orders
) t;
赞(0)
未经允许不得转载:順子の杂货铺 » 05-首尾值与Nth值获取
搬瓦工VPS

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

分享创造快乐

联系我们联系我们