顺子の杂货铺
生命不息,折腾不止,且行且珍惜~

16-条件聚合

DMIT VPS

条件聚合

使用CASE WHEN实现基于条件的统计聚合,是复杂数据分析的核心技能。

📊 支持情况

数据库CASE WHENSUM IFCOUNT IF备注
MySQL✅ 8.0+✅ 8.0+完整支持
Oracle完整支持
ClickHousesumIf/countIf
Hologres兼容PG
MaxCompute完整支持
Hive完整支持

1. 单条件聚合

基本CASE WHEN

-- 统计男女数量
SELECT
    COUNT(CASE WHEN gender = '男' THEN 1 END) AS male_count,
    COUNT(CASE WHEN gender = '女' THEN 1 END) AS female_count,
    COUNT(*) AS total
FROM users;

SUM + CASE WHEN

-- 统计销售额
SELECT
    SUM(CASE WHEN status = 'completed' THEN amount ELSE 0 END) AS completed_sales,
    SUM(CASE WHEN status = 'pending' THEN amount ELSE 0 END) AS pending_sales,
    SUM(CASE WHEN status = 'cancelled' THEN amount ELSE 0 END) AS cancelled_sales
FROM orders;

简化写法(MySQL/ClickHouse)

-- MySQL 8.0+ / ClickHouse
SELECT
    COUNT_IF(status = 'completed') AS completed_count,
    SUM_IF(amount, status = 'completed') AS completed_sales,
    COUNT_IF(status = 'pending') AS pending_count
FROM orders;

2. 多条件聚合

嵌套CASE WHEN

-- 分段统计
SELECT
    SUM(CASE
        WHEN age < 18 THEN 1
        WHEN age < 30 THEN 1
        ELSE 0
    END) AS young,
    SUM(CASE
        WHEN age >= 18 AND age < 30 THEN 1
        ELSE 0
    END) AS adult
FROM users;

-- 使用ELSE简化
SELECT
    SUM(CASE WHEN age < 18 THEN 1 ELSE 0 END) AS under_18,
    SUM(CASE WHEN age BETWEEN 18 AND 30 THEN 1 ELSE 0 END) AS 18_30,
    SUM(CASE WHEN age > 30 THEN 1 ELSE 0 END) AS over_30
FROM users;

分段金额统计

-- 订单金额分段
SELECT
    COUNT(CASE WHEN amount < 100 THEN 1 END) AS small_orders,
    COUNT(CASE WHEN amount >= 100 AND amount < 500 THEN 1 END) AS medium_orders,
    COUNT(CASE WHEN amount >= 500 AND amount < 1000 THEN 1 END) AS large_orders,
    COUNT(CASE WHEN amount >= 1000 THEN 1 END) AS vip_orders
FROM orders;

3. 分组条件聚合

按组分别统计

-- 按地区分别统计男女会员
SELECT
    region,
    SUM(CASE WHEN gender = '男' THEN 1 ELSE 0 END) AS male_members,
    SUM(CASE WHEN gender = '女' THEN 1 ELSE 0 END) AS female_members
FROM users
GROUP BY region;

多个分组条件

-- 按地区+年龄段统计
SELECT
    region,
    CASE
        WHEN age < 18 THEN '未成年'
        WHEN age < 30 THEN '青年'
        WHEN age < 50 THEN '中年'
        ELSE '老年'
    END AS age_group,
    COUNT(*) AS member_count
FROM users
GROUP BY region, age_group;

4. 实战案例

场景1:销售漏斗统计

-- 统计各阶段转化
SELECT
    COUNT(*) AS total_visits,
    COUNT(CASE WHEN event = 'view' THEN 1 END) AS views,
    COUNT(CASE WHEN event = 'cart' THEN 1 END) AS carts,
    COUNT(CASE WHEN event = 'checkout' THEN 1 END) AS checkouts,
    COUNT(CASE WHEN event = 'purchase' THEN 1 END) AS purchases
FROM user_events;

场景2:会员等级分布

SELECT
    region,
    SUM(CASE WHEN level = '普通会员' THEN 1 ELSE 0 END) AS normal,
    SUM(CASE WHEN level = '银牌会员' THEN 1 ELSE 0 END) AS silver,
    SUM(CASE WHEN level = '金牌会员' THEN 1 ELSE 0 END) AS gold,
    SUM(CASE WHEN level = '钻石会员' THEN 1 ELSE 0 END) AS diamond
FROM members
GROUP BY region;

场景3:库存状态统计

SELECT
    warehouse,
    SUM(CASE WHEN status = 'in_stock' THEN quantity ELSE 0 END) AS in_stock,
    SUM(CASE WHEN status = 'low_stock' THEN quantity ELSE 0 END) AS low_stock,
    SUM(CASE WHEN status = 'out_of_stock' THEN 1 ELSE 0 END) AS out_of_stock_count,
    SUM(CASE WHEN status = 'in_stock' THEN 1 ELSE 0 END) AS in_stock_count
FROM inventory
GROUP BY warehouse;

场景4:支付方式统计

SELECT
    DATE(order_date) AS order_date,
    SUM(CASE WHEN payment_type = 'alipay' THEN amount ELSE 0 END) AS alipay,
    SUM(CASE WHEN payment_type = 'wechat' THEN amount ELSE 0 END) AS wechat,
    SUM(CASE WHEN payment_type = 'card' THEN amount ELSE 0 END) AS card,
    SUM(CASE WHEN payment_type = 'cash' THEN amount ELSE 0 END) AS cash
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 7 DAY)
GROUP BY DATE(order_date)
ORDER BY order_date;

5. 条件聚合 + 窗口函数

带排名条件

-- 统计每个部门的高薪员工数
SELECT
    dept_id,
    SUM(CASE WHEN salary > 10000 THEN 1 ELSE 0 END) AS high_earners,
    COUNT(*) AS total_employees
FROM employees
GROUP BY dept_id;

连续满足条件

-- 统计连续3天以上有订单的天数
SELECT
    user_id,
    SUM(CASE WHEN order_count > 0 THEN 1 ELSE 0 END) AS active_days
FROM (
    SELECT
        user_id,
        DATE(order_date) AS day,
        COUNT(*) AS order_count
    FROM orders
    GROUP BY user_id, DATE(order_date)
) daily_orders
GROUP BY user_id;

6. 复杂条件表达式

多条件组合

-- 统计VIP客户的活跃订单
SELECT
    SUM(CASE
        WHEN user_type = 'vip' AND status = 'completed' THEN 1
        ELSE 0
    END) AS vip_completed
FROM orders;

NULL条件处理

-- 区分NULL和0
SELECT
    SUM(CASE WHEN amount IS NULL THEN 1 ELSE 0 END) AS null_count,
    SUM(CASE WHEN amount = 0 THEN 1 ELSE 0 END) AS zero_count,
    SUM(CASE WHEN amount > 0 THEN 1 ELSE 0 END) AS positive_count
FROM orders;

日期条件

-- 按订单状态和日期范围统计
SELECT
    SUM(CASE
        WHEN status = 'completed'
         AND order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
        THEN amount ELSE 0
    END) AS recent_completed
FROM orders;

7. 多列条件聚合

同时检查多列

-- 检查多列是否满足条件
SELECT
    SUM(CASE WHEN col1 > 0 AND col2 > 0 THEN 1 ELSE 0 END) AS both_positive,
    SUM(CASE WHEN col1 > 0 OR col2 > 0 THEN 1 ELSE 0 END) AS either_positive,
    SUM(CASE WHEN col1 > 0 AND col2 IS NULL THEN 1 ELSE 0 END) AS col1_only
FROM data;

区间判断

-- 销量是否达标
SELECT
    SUM(CASE WHEN sales >= target THEN 1 ELSE 0 END) AS met_target,
    SUM(CASE WHEN sales < target * 0.8 THEN 1 ELSE 0 END) AS below_80pct,
    SUM(CASE WHEN sales >= target * 1.2 THEN 1 ELSE 0 END) AS exceeded_120pct
FROM performance;

⚠️ 注意事项

1. 性能影响

-- ✅ 建议:一次扫描完成所有聚合
SELECT
    SUM(CASE WHEN status = 'a' THEN amount ELSE 0 END) AS a_sum,
    SUM(CASE WHEN status = 'b' THEN amount ELSE 0 END) AS b_sum,
    SUM(CASE WHEN status = 'c' THEN amount ELSE 0 END) AS c_sum
FROM orders;

-- ❌ 避免:多次查询
SELECT SUM(amount) FROM orders WHERE status = 'a';
SELECT SUM(amount) FROM orders WHERE status = 'b';
SELECT SUM(amount) FROM orders WHERE status = 'c';

2. 数据类型

-- 确保返回值类型一致
SELECT
    SUM(CASE WHEN flag THEN 1 ELSE 0 END) AS count_flag,
    AVG(CASE WHEN flag THEN 1 ELSE 0 END) AS pct_flag
FROM data;

📝 练习题

-- 建表
CREATE TABLE exam_scores (
    student_id INT,
    subject STRING,
    score INT,
    pass_line INT
);

INSERT INTO exam_scores VALUES
(1, '数学', 85, 60),
(1, '语文', 72, 60),
(1, '英语', 55, 60),
(2, '数学', 90, 60),
(2, '语文', 68, 60),
(2, '英语', 95, 60),
(3, '数学', 45, 60),
(3, '语文', 58, 60),
(3, '英语', 60, 60);

要求

  1. 统计各科目的及格人数和不及格人数
  2. 统计每个学生的及格科目数
  3. 统计各分数段的人数(<60, 60-70, 70-80, 80-90, >90)
  4. 计算每个学生各科是否及格(1/0)
-- 参考答案(MySQL)
-- 1. 各科目及格/不及格统计
SELECT
    subject,
    SUM(CASE WHEN score >= 60 THEN 1 ELSE 0 END) AS passed,
    SUM(CASE WHEN score < 60 THEN 1 ELSE 0 END) AS failed
FROM exam_scores
GROUP BY subject;

-- 2. 每个学生的及格科目数
SELECT
    student_id,
    SUM(CASE WHEN score >= 60 THEN 1 ELSE 0 END) AS passed_count,
    COUNT(*) AS total_subjects
FROM exam_scores
GROUP BY student_id;

-- 3. 分数段统计
SELECT
    SUM(CASE WHEN score < 60 THEN 1 ELSE 0 END) AS score_0_59,
    SUM(CASE WHEN score >= 60 AND score < 70 THEN 1 ELSE 0 END) AS score_60_69,
    SUM(CASE WHEN score >= 70 AND score < 80 THEN 1 ELSE 0 END) AS score_70_79,
    SUM(CASE WHEN score >= 80 AND score < 90 THEN 1 ELSE 0 END) AS score_80_89,
    SUM(CASE WHEN score >= 90 THEN 1 ELSE 0 END) AS score_90_100
FROM exam_scores;

-- 4. 学生各科及格标记
SELECT
    student_id,
    subject,
    score,
    CASE WHEN score >= 60 THEN 1 ELSE 0 END AS is_passed
FROM exam_scores
ORDER BY student_id, subject;
赞(0)
未经允许不得转载:順子の杂货铺 » 16-条件聚合
搬瓦工VPS

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

分享创造快乐

联系我们联系我们