交叉表与数据钻取
交叉表(透视表)是多维数据分析的核心工具,数据钻取是OLAP分析的基础能力。
📊 支持情况
| 数据库 | PIVOT | UNPIVOT | GROUPING SETS | CUBE | ROLLUP | 备注 |
|---|---|---|---|---|---|---|
| MySQL | ❌ | ❌ | ✅ | ✅ | ✅ | CASE WHEN实现 |
| Oracle | ✅ | ✅ | ✅ | ✅ | ✅ | 完整支持 |
| ClickHouse | ❌ | ❌ | ✅ | ✅ | ✅ | 使用GROUPING |
| Hologres | ❌ | ❌ | ✅ | ✅ | ✅ | 兼容PG |
| MaxCompute | ❌ | ❌ | ✅ | ✅ | ✅ | 完整支持 |
| Hive | ❌ | ❌ | ✅ | ✅ | ✅ | 完整支持 |
1. 交叉表实现
MySQL CASE WHEN实现
-- 基础交叉表
SELECT
region,
SUM(CASE WHEN quarter = 'Q1' THEN sales ELSE 0 END) AS q1_sales,
SUM(CASE WHEN quarter = 'Q2' THEN sales ELSE 0 END) AS q2_sales,
SUM(CASE WHEN quarter = 'Q3' THEN sales ELSE 0 END) AS q3_sales,
SUM(CASE WHEN quarter = 'Q4' THEN sales ELSE 0 END) AS q4_sales,
SUM(sales) AS total_sales
FROM quarterly_sales
GROUP BY region;Oracle PIVOT
-- Oracle PIVOT语法
SELECT *
FROM (
SELECT region, quarter, sales
FROM quarterly_sales
)
PIVOT (
SUM(sales)
FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4')
)
ORDER BY region;多指标交叉表
-- MySQL - 多指标透视
SELECT
region,
SUM(CASE WHEN quarter = 'Q1' THEN sales ELSE 0 END) AS q1_sales,
SUM(CASE WHEN quarter = 'Q1' THEN orders ELSE 0 END) AS q1_orders,
SUM(CASE WHEN quarter = 'Q2' THEN sales ELSE 0 END) AS q2_sales,
SUM(CASE WHEN quarter = 'Q2' THEN orders ELSE 0 END) AS q2_orders
FROM quarterly_sales
GROUP BY region;2. 数据钻取
向上钻取(Roll-up)
-- 从明细到汇总
-- 城市 -> 省份 -> 区域 -> 全国
-- 按城市
SELECT region, province, city, SUM(sales) FROM sales GROUP BY region, province, city;
-- 按省份
SELECT region, province, SUM(sales) FROM sales GROUP BY region, province;
-- 按区域
SELECT region, SUM(sales) FROM sales GROUP BY region;
-- 全国
SELECT SUM(sales) FROM sales;使用ROLLUP自动钻取
-- MySQL - ROLLUP多级汇总
SELECT
COALESCE(region, '全国') AS region,
COALESCE(province, '全部') AS province,
SUM(sales) AS total_sales
FROM sales
GROUP BY region, province WITH ROLLUP;
-- 结果:
-- 华东 江苏省 1000
-- 华东 全部 2000
-- 全国 全部 5000使用GROUPING SETS指定钻取维度
-- MySQL - 指定多个汇总级别
SELECT
region,
province,
city,
SUM(sales) AS sales
FROM sales
GROUP BY GROUPING SETS (
(region, province, city), -- 明细
(region, province), -- 省级汇总
(region), -- 区域汇总
() -- 总计
);3. CUBE多维分析
完整立方体
-- MySQL - CUBE所有组合
SELECT
region,
product_category,
quarter,
SUM(sales) AS sales
FROM sales_cube
GROUP BY region, product_category, quarter WITH CUBE;
-- 等同于 GROUPING SETS
GROUP BY GROUPING SETS (
(region, product_category, quarter),
(region, product_category),
(region, quarter),
(product_category, quarter),
(region),
(product_category),
(quarter),
()
);筛选汇总结果
-- 只看区域+产品的汇总(不含quarter)
SELECT
region,
product_category,
SUM(sales) AS sales
FROM sales_cube
GROUP BY region, product_category, quarter WITH CUBE
HAVING GROUPING(quarter) = 1;4. GROUPING函数
识别汇总行
-- MySQL - GROUPING函数
SELECT
region,
province,
SUM(sales) AS sales,
GROUPING(region) AS is_region_total,
GROUPING(province) AS is_province_total
FROM sales
GROUP BY region, province WITH ROLLUP;
-- 结果示例:
-- 华东 江苏 1000 0 0 -- 明细行
-- 华东 NULL 2000 0 1 -- 区域小计
-- NULL NULL 5000 1 1 -- 总计格式化汇总行
-- MySQL - 格式化输出
SELECT
CASE WHEN GROUPING(region) = 1 THEN '全国' ELSE region END AS region,
CASE WHEN GROUPING(province) = 1 THEN '合计' ELSE province END AS province,
SUM(sales) AS sales
FROM sales
GROUP BY region, province WITH ROLLUP;5. 实战案例
场景1:销售多维分析
-- MySQL - 销售交叉表
SELECT
region,
product_category,
SUM(CASE WHEN quarter = 'Q1' THEN sales ELSE 0 END) AS q1,
SUM(CASE WHEN quarter = 'Q2' THEN sales ELSE 0 END) AS q2,
SUM(CASE WHEN quarter = 'Q3' THEN sales ELSE 0 END) AS q3,
SUM(CASE WHEN quarter = 'Q4' THEN sales ELSE 0 END) AS q4,
SUM(sales) AS total
FROM sales_analysis
GROUP BY region, product_category;场景2:动态交叉表
-- MySQL - 动态列转换
SET @sql = NULL;
SELECT
GROUP_CONCAT(DISTINCT
CONCAT(
'SUM(CASE WHEN quarter = ''',
quarter,
''' THEN sales ELSE 0 END) AS `',
quarter, '`'
)
) INTO @sql
FROM quarters;
SET @sql = CONCAT('SELECT region, ', @sql, ', SUM(sales) AS total FROM sales GROUP BY region');
PREPARE stmt FROM @sql;
EXECUTE stmt;
DEALLOCATE PREPARE stmt;场景3:层级钻取
-- MySQL - 支持多层级钻取
SELECT
IF(GROUPING(quarter), '年度', quarter) AS time_level,
IF(GROUPING(region), '全部区域', region) AS region_level,
SUM(sales) AS sales
FROM sales
GROUP BY quarter, region WITH ROLLUP;场景4:占比计算
-- MySQL - 计算行占比和列占比
SELECT
region,
q1_sales,
q2_sales,
q1_sales + q2_sales AS half_year,
ROUND(q1_sales * 100.0 / (q1_sales + q2_sales), 2) AS q1_pct,
ROUND(SUM(q1_sales + q2_sales) OVER () * 100.0 / SUM(q1_sales + q2_sales) OVER (), 2) AS region_pct
FROM (
SELECT
region,
SUM(CASE WHEN quarter = 'Q1' THEN sales ELSE 0 END) AS q1_sales,
SUM(CASE WHEN quarter = 'Q2' THEN sales ELSE 0 END) AS q2_sales
FROM sales
GROUP BY region
) t;6. ClickHouse特殊语法
GROUPING SETS
SELECT
region,
product_category,
toDate_trunc('month', sale_date) AS month,
SUM(sales) AS sales
FROM sales
GROUP BY GROUPING SETS (
(region, product_category, toDate_trunc('month', sale_date)),
(region, product_category),
(region),
()
);WITH TOTALS
-- ClickHouse - 自动计算总计
SELECT
region,
product_category,
SUM(sales) AS sales
FROM sales
GROUP BY region, product_category WITH TOTALS;
-- 结果最后会显示总计行⚠️ 注意事项
1. 性能问题
-- CUBE会产生大量汇总组合
-- 数据量大时谨慎使用
-- 建议使用 GROUPING SETS 指定需要的维度2. NULL值处理
-- ROLLUP产生的NULL需要用COALESCE处理
SELECT
COALESCE(region, '总计') AS region,
SUM(sales) AS sales
FROM sales
GROUP BY region WITH ROLLUP;3. 排序
-- 汇总行通常需要排序到最后
SELECT
region,
SUM(sales) AS sales
FROM sales
GROUP BY region WITH ROLLUP
ORDER BY
CASE WHEN region IS NULL THEN 1 ELSE 0 END,
region;📝 练习题
-- 建表
CREATE TABLE sales_drill (
region STRING,
province STRING,
city STRING,
product STRING,
quarter STRING,
sales DECIMAL(10,2)
);
INSERT INTO sales_drill VALUES
('华东', '江苏', '南京', 'A产品', 'Q1', 100),
('华东', '江苏', '南京', 'B产品', 'Q1', 150),
('华东', '浙江', '杭州', 'A产品', 'Q1', 200),
('华东', '浙江', '杭州', 'B产品', 'Q1', 180),
('华南', '广东', '广州', 'A产品', 'Q1', 300),
('华南', '广东', '广州', 'B产品', 'Q1', 250);要求:
- 创建按区域和产品交叉的透视表
- 使用ROLLUP获取区域小计和总计
- 使用GROUPING SETS获取指定维度汇总
- 计算各区域销售额占全国比例
-- 参考答案(MySQL)
-- 1. 交叉表
SELECT
region,
product,
SUM(CASE WHEN quarter = 'Q1' THEN sales ELSE 0 END) AS q1_sales
FROM sales_drill
GROUP BY region, product;
-- 2. ROLLUP汇总
SELECT
COALESCE(region, '全国') AS region,
COALESCE(product, '全部') AS product,
SUM(sales) AS total_sales
FROM sales_drill
GROUP BY region, product WITH ROLLUP;
-- 3. GROUPING SETS
SELECT
region,
province,
city,
SUM(sales) AS sales
FROM sales_drill
GROUP BY GROUPING SETS (
(region, province, city),
(region, province),
(region),
()
);
-- 4. 占比计算
SELECT
region,
SUM(sales) AS regional_sales,
ROUND(SUM(sales) * 100.0 / SUM(SUM(sales)) OVER (), 2) AS pct_of_total
FROM sales_drill
GROUP BY region;
順子の杂货铺


