顺子の杂货铺
生命不息,折腾不止,且行且珍惜~

22-子查询优化

DMIT VPS

子查询优化

子查询是复杂查询的基础,但不当使用会影响性能。本篇介绍子查询的优化技巧。

📊 支持情况

数据库相关子查询EXISTSINJOIN替代优化器
MySQL持续优化
OracleCBO优化
ClickHouse向量化
HologresPG优化器
MaxCompute分布式优化
HiveCBO优化

1. 子查询类型

标量子查询

-- 返回单个值
SELECT
    name,
    (SELECT MAX(salary) FROM employees) AS max_salary
FROM departments;

-- 多个列(返回单行)
SELECT
    d.name,
    (SELECT name, salary FROM employees
     WHERE department_id = d.id
     ORDER BY salary DESC LIMIT 1) AS top_employee
FROM departments d;

行子查询

-- 返回单行多列
SELECT *
FROM products
WHERE (category, price) = (
    SELECT category, MAX(price)
    FROM products
    GROUP BY category
);

列子查询

-- 返回一列多行
SELECT *
FROM products
WHERE category IN (
    SELECT category FROM top_categories
);

表子查询

-- 返回多行多列
SELECT *
FROM orders o
JOIN (
    SELECT user_id, SUM(amount) AS total
    FROM orders
    GROUP BY user_id
    HAVING SUM(amount) > 10000
) high_value ON o.user_id = high_value.user_id;

2. EXISTS vs IN

IN 子查询

-- MySQL
SELECT *
FROM orders
WHERE user_id IN (
    SELECT user_id FROM vip_users
);

EXISTS 子查询

-- MySQL - 通常更高效
SELECT *
FROM orders o
WHERE EXISTS (
    SELECT 1 FROM vip_users v
    WHERE v.user_id = o.user_id
);

选择指南

场景推荐使用原因
小结果集 IN 大表IN缓存小结果集
大结果集 IN 小表EXISTS尽早退出
NOT IN 包含NULLNOT EXISTSNULL处理
相关子查询EXISTS相关性优化

3. JOIN 替代子查询

相关子查询转JOIN

-- ❌ 低效:相关子查询(每行执行一次)
SELECT
    o.order_id,
    o.amount,
    (SELECT MAX(amount) FROM orders WHERE user_id = o.user_id) AS max_order
FROM orders o;

-- ✅ 高效:JOIN(只执行一次)
SELECT
    o.order_id,
    o.amount,
    o.user_max.max_amount AS max_order
FROM orders o
JOIN (
    SELECT user_id, MAX(amount) AS max_amount
    FROM orders
    GROUP BY user_id
) o.user_max ON o.user_id = o.user_max.user_id;

IN 转 JOIN

-- ❌ IN子查询
SELECT *
FROM orders
WHERE user_id IN (
    SELECT user_id FROM high_value_users
);

-- ✅ JOIN
SELECT o.*
FROM orders o
JOIN high_value_users h ON o.user_id = h.user_id;

NOT IN vs NOT EXISTS

-- NOT IN(可能有问题)
SELECT *
FROM orders
WHERE user_id NOT IN (
    SELECT user_id FROM blacklist_users
);
-- 如果子查询返回NULL,结果为空

-- ✅ NOT EXISTS(安全)
SELECT o.*
FROM orders o
WHERE NOT EXISTS (
    SELECT 1 FROM blacklist_users b
    WHERE b.user_id = o.user_id
);

4. 派生表优化

派生表下推

-- ✅ 建议:在派生表中先过滤和聚合
SELECT *
FROM (
    SELECT user_id, SUM(amount) AS total
    FROM orders
    WHERE order_date >= '2024-01-01'  -- 先过滤
    GROUP BY user_id
) user_totals
WHERE total > 1000;

派生表索引

-- 确保派生表查询能利用索引
SELECT *
FROM (
    SELECT * FROM orders
    WHERE order_date >= '2024-01-01'
    ORDER BY order_id
    LIMIT 100
) t
JOIN products p ON t.product_id = p.id;

5. 实战案例

场景1:复杂报表查询

-- ❌ 低效写法
SELECT
    (SELECT COUNT(*) FROM orders WHERE user_id = u.user_id) AS order_count,
    (SELECT SUM(amount) FROM orders WHERE user_id = u.user_id) AS total_spent,
    (SELECT MAX(order_date) FROM orders WHERE user_id = u.user_id) AS last_order
FROM users u;

-- ✅ 优化写法
SELECT
    u.user_id,
    COALESCE(o.order_count, 0) AS order_count,
    COALESCE(o.total_spent, 0) AS total_spent,
    o.last_order
FROM users u
LEFT JOIN (
    SELECT
        user_id,
        COUNT(*) AS order_count,
        SUM(amount) AS total_spent,
        MAX(order_date) AS last_order
    FROM orders
    GROUP BY user_id
) o ON u.user_id = o.user_id;

场景2:排名查询

-- ❌ 低效:相关子查询排名
SELECT
    u.user_id,
    (SELECT COUNT(*) FROM users u2 WHERE u2.score > u.score) + 1 AS rank
FROM users u;

-- ✅ 高效:窗口函数
SELECT
    user_id,
    score,
    RANK() OVER (ORDER BY score DESC) AS rank
FROM users;

场景3:最新记录查询

-- ❌ 低效:相关子查询
SELECT *
FROM orders o1
WHERE o1.order_date = (
    SELECT MAX(order_date)
    FROM orders o2
    WHERE o2.user_id = o1.user_id
);

-- ✅ 高效:窗口函数
SELECT *
FROM (
    SELECT *,
           ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY order_date DESC) AS rn
    FROM orders
) t
WHERE rn = 1;

6. 优化器提示

MySQL

-- 强制使用派生表合并
SELECT /*+ DERIVED_CONCATITION(t) */ ...
FROM (SELECT * FROM large_table) t;

-- 使用索引
SELECT /*+ INDEX(o idx_user_id) */ ...
FROM orders o;

Oracle

-- 强制哈希连接
SELECT /*+ USE_HASH(o u) */ ...
FROM orders o, users u
WHERE o.user_id = u.user_id;

-- 取消子查询展开
SELECT /*+ NO_UNNEST */ ...
FROM orders o
WHERE EXISTS (SELECT 1 FROM users u WHERE u.user_id = o.user_id);

Hive

-- 启用CBO
SET hive.cbo.enable=true;

-- 启用谓词下推
SET hive.optimize.ppd=true;

⚠️ 注意事项

1. NULL值问题

-- ❌ IN遇到NULL
SELECT * FROM t WHERE col IN (1, 2, NULL);
-- 等同于 col = 1 OR col = 2 OR col = NULL,结果总是UNKNOWN

-- ✅ 使用NOT EXISTS
SELECT * FROM t WHERE NOT EXISTS (SELECT 1 FROM other WHERE other.col = t.col);

2. 相关子查询性能

-- 相关子查询会执行多次
SELECT * FROM t1
WHERE EXISTS (SELECT 1 FROM t2 WHERE t2.id = t1.id);

-- 如果数据量大,建议用JOIN
SELECT DISTINCT t1.* FROM t1 JOIN t2 ON t1.id = t2.id;

3. 临时表优化

-- 大数据量时,先物化子查询
CREATE TEMPORARY TABLE temp_users AS
SELECT user_id, level FROM users WHERE status = 'active';

SELECT * FROM orders
WHERE user_id IN (SELECT user_id FROM temp_users);

📝 练习题

-- 建表
CREATE TABLE products (
    product_id INT,
    category STRING,
    price DECIMAL(10,2),
    stock INT
);

CREATE TABLE orders (
    order_id INT,
    user_id INT,
    product_id INT,
    amount DECIMAL(10,2),
    order_date DATE
);

INSERT INTO products VALUES
(1, 'A', 100, 50),
(2, 'A', 150, 30),
(3, 'B', 200, 20),
(4, 'B', 250, 10);

INSERT INTO orders VALUES
(1, 101, 1, 100, '2024-01-14'),
(2, 102, 2, 150, '2024-01-14'),
(3, 101, 3, 200, '2024-01-15'),
(4, 103, 4, 250, '2024-01-15');

要求

  1. 优化:查询购买过A类产品的所有用户
  2. 优化:查询每个类别销量最高的商品
  3. 优化:查询用户最近一次订单
-- 参考答案
-- 1. IN改JOIN
SELECT DISTINCT o.user_id
FROM orders o
JOIN products p ON o.product_id = p.product_id
WHERE p.category = 'A';

-- 2. 子查询转JOIN
SELECT p.*
FROM products p
JOIN (
    SELECT category, MAX(price) AS max_price
    FROM products
    GROUP BY category
) t ON p.category = t.category AND p.price = t.max_price;

-- 3. 窗口函数替代相关子查询
SELECT *
FROM (
    SELECT *,
           ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY order_date DESC) AS rn
    FROM orders
) t
WHERE rn = 1;
赞(0)
未经允许不得转载:順子の杂货铺 » 22-子查询优化
搬瓦工VPS

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

分享创造快乐

联系我们联系我们