顺子の杂货铺
生命不息,折腾不止,且行且珍惜~

26-RFM分析

DMIT VPS

RFM分析

RFM分析是用户价值分群的经典方法,通过Recency(最近)、Frequency(频率)、Monetary(金额)三个维度对用户进行分类。

📊 支持情况

数据库 MAX/MIN COUNT SUM NTILE WINDOW 备注
MySQL ✅ 8.0+ 完整支持
Oracle 完整支持
ClickHouse 完整支持
Hologres 完整支持
MaxCompute 完整支持
Hive 完整支持

1. RFM指标计算

基础RFM计算

-- MySQL - 计算RFM指标
WITH rfm AS (
    SELECT
        user_id,
        MAX(order_date) AS last_order_date,
        COUNT(*) AS frequency,
        SUM(amount) AS monetary
    FROM orders
    WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR)
    GROUP BY user_id
)
SELECT
    user_id,
    last_order_date,
    DATEDIFF(CURDATE(), last_order_date) AS recency_days,
    frequency,
    monetary,
    RANK() OVER (ORDER BY DATEDIFF(CURDATE(), last_order_date)) AS r_rank,
    RANK() OVER (ORDER BY frequency DESC) AS f_rank,
    RANK() OVER (ORDER BY monetary DESC) AS m_rank
FROM rfm;

计算RFM分值

-- MySQL - 使用NTILE分组
WITH rfm AS (
    SELECT
        user_id,
        DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
        COUNT(*) AS frequency,
        SUM(amount) AS monetary
    FROM orders
    GROUP BY user_id
),
rfm_score AS (
    SELECT
        user_id,
        recency,
        frequency,
        monetary,
        NTILE(5) OVER (ORDER BY recency DESC) AS r_score,  -- 越小越好,反向
        NTILE(5) OVER (ORDER BY frequency) AS f_score,     -- 越大越好
        NTILE(5) OVER (ORDER BY monetary) AS m_score       -- 越大越好
    FROM rfm
)
SELECT
    user_id,
    recency,
    frequency,
    monetary,
    r_score,
    f_score,
    m_score,
    CONCAT(r_score, f_score, m_score) AS rfm_combined
FROM rfm_score;

2. 用户分群

基于RFM的用户分群

-- MySQL - 用户价值分群
SELECT
    user_id,
    r_score,
    f_score,
    m_score,
    CASE
        WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN '重要价值用户'
        WHEN r_score >= 4 AND f_score < 4 AND m_score >= 4 THEN '重要发展用户'
        WHEN r_score < 4 AND f_score >= 4 AND m_score >= 4 THEN '重要保持用户'
        WHEN r_score < 4 AND f_score < 4 AND m_score >= 4 THEN '重要挽留用户'
        WHEN r_score >= 4 AND f_score >= 4 AND m_score < 4 THEN '一般价值用户'
        WHEN r_score >= 4 AND f_score < 4 AND m_score < 4 THEN '一般发展用户'
        WHEN r_score < 4 AND f_score >= 4 AND m_score < 4 THEN '一般保持用户'
        ELSE '一般挽留用户'
    END AS user_segment
FROM (
    SELECT
        user_id,
        NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
        NTILE(5) OVER (ORDER BY frequency) AS f_score,
        NTILE(5) OVER (ORDER BY monetary) AS m_score
    FROM (
        SELECT
            user_id,
            DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
            COUNT(*) AS frequency,
            SUM(amount) AS monetary
        FROM orders
        GROUP BY user_id
    ) t
) t;

用户分群统计

-- MySQL - 各分群用户统计
SELECT
    user_segment,
    COUNT(*) AS user_count,
    ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS pct,
    ROUND(AVG(monetary), 2) AS avg_monetary,
    ROUND(AVG(frequency), 2) AS avg_frequency
FROM (
    SELECT
        user_id,
        monetary,
        frequency,
        CASE
            WHEN r_score >= 4 AND f_score >= 4 THEN '高价值'
            WHEN r_score >= 4 THEN '新用户'
            WHEN f_score >= 4 THEN '活跃用户'
            ELSE '沉默用户'
        END AS user_segment
    FROM (
        SELECT
            user_id,
            DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
            COUNT(*) AS frequency,
            SUM(amount) AS monetary,
            NTILE(5) OVER (ORDER BY DATEDIFF(CURDATE(), MAX(order_date)) DESC) AS r_score,
            NTILE(5) OVER (ORDER BY COUNT(*)) AS f_score
        FROM orders
        GROUP BY user_id
    ) t
) t
GROUP BY user_segment;

3. 实战案例

场景1:电商用户价值分层

-- MySQL
WITH rfm_base AS (
    SELECT
        user_id,
        DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
        COUNT(DISTINCT order_id) AS frequency,
        SUM(amount) AS monetary
    FROM orders
    WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 365 DAY)
    GROUP BY user_id
),
rfm_scores AS (
    SELECT
        user_id,
        recency,
        frequency,
        monetary,
        NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
        NTILE(5) OVER (ORDER BY frequency DESC) AS f_score,
        NTILE(5) OVER (ORDER BY monetary DESC) AS m_score
    FROM rfm_base
)
SELECT
    r_score,
    f_score,
    COUNT(*) AS users,
    ROUND(SUM(monetary), 2) AS total_revenue,
    ROUND(AVG(monetary), 2) AS avg_revenue
FROM rfm_scores
GROUP BY r_score, f_score
ORDER BY r_score, f_score;

场景2:流失预警

-- MySQL - 识别高风险流失用户
SELECT
    user_id,
    recency,
    frequency,
    monetary,
    r_score,
    CASE
        WHEN r_score = 1 THEN '高流失风险'
        WHEN r_score = 2 THEN '较高风险'
        WHEN r_score = 3 THEN '中等风险'
        ELSE '低风险'
    END AS churn_risk,
    recommended_action
FROM (
    SELECT
        user_id,
        DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
        COUNT(*) AS frequency,
        SUM(amount) AS monetary,
        NTILE(5) OVER (ORDER BY DATEDIFF(CURDATE(), MAX(order_date))) AS r_score
    FROM orders
    GROUP BY user_id
) t
ORDER BY r_score;

场景3:用户生命周期价值

-- MySQL - CLV预测
WITH rfm AS (
    SELECT
        user_id,
        DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
        COUNT(*) AS frequency,
        SUM(amount) AS monetary
    FROM orders
    GROUP BY user_id
),
clv AS (
    SELECT
        user_id,
        monetary AS ltv,
        monetary / NULLIF(frequency, 0) AS avg_order_value,
        CASE
            WHEN recency <= 30 THEN '活跃'
            WHEN recency <= 90 THEN '沉睡'
            ELSE '流失'
        END AS status
    FROM rfm
)
SELECT
    status,
    COUNT(*) AS users,
    ROUND(AVG(ltv), 2) AS avg_ltv,
    ROUND(AVG(avg_order_value), 2) AS avg_aov
FROM clv
GROUP BY status;

场景4:RFM交叉分析

-- MySQL - RFM热力图数据
SELECT
    r_score,
    f_score,
    COUNT(*) AS user_count,
    ROUND(AVG(monetary), 2) AS avg_monetary
FROM (
    SELECT
        user_id,
        NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
        NTILE(5) OVER (ORDER BY frequency DESC) AS f_score,
        SUM(amount) AS monetary
    FROM (
        SELECT
            user_id,
            DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
            SUM(amount) AS monetary
        FROM orders
        GROUP BY user_id
    ) t
) t
GROUP BY r_score, f_score
ORDER BY r_score, f_score;

4. 用户生命周期管理

用户状态标记

-- MySQL - 用户生命周期状态
WITH latest_order AS (
    SELECT
        user_id,
        MAX(order_date) AS last_order,
        COUNT(*) AS order_count,
        SUM(amount) AS total_amount
    FROM orders
    GROUP BY user_id
),
user_status AS (
    SELECT
        user_id,
        last_order,
        DATEDIFF(CURDATE(), last_order) AS days_since_last,
        order_count,
        total_amount,
        CASE
            WHEN days_since_last <= 30 THEN '活跃'
            WHEN days_since_last <= 90 THEN '沉睡'
            WHEN days_since_last <= 180 THEN '不活跃'
            ELSE '流失'
        END AS status
    FROM latest_order
)
SELECT
    status,
    COUNT(*) AS users,
    ROUND(AVG(total_amount), 2) AS avg_spent
FROM user_status
GROUP BY status;

差异化运营策略

-- MySQL - 生成运营建议
SELECT
    user_id,
    user_segment,
    CASE
        WHEN user_segment = '重要价值用户' THEN '提供VIP专属服务,保持高价值'
        WHEN user_segment = '重要发展用户' THEN '推送高价值商品,提升消费频次'
        WHEN user_segment = '重要保持用户' THEN '发送召回优惠,唤醒活跃'
        WHEN user_segment = '重要挽留用户' THEN '紧急召回,分析流失原因'
        WHEN user_segment = '一般价值用户' THEN '培养消费习惯,引导升级'
        WHEN user_segment = '一般发展用户' THEN '首单优惠,促进复购'
        WHEN user_segment = '一般保持用户' THEN '定期触达,维护关系'
        ELSE '成本较高,降低运营投入'
    END AS recommended_action
FROM (
    SELECT
        user_id,
        CASE
            WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN '重要价值用户'
            WHEN r_score >= 4 AND f_score >= 4 THEN '重要发展用户'
            WHEN r_score < 4 AND f_score >= 4 THEN '重要保持用户'
            WHEN r_score < 4 AND f_score < 4 AND m_score >= 4 THEN '重要挽留用户'
            ELSE '一般用户'
        END AS user_segment,
        r_score, f_score, m_score
    FROM (
        SELECT
            user_id,
            NTILE(5) OVER (ORDER BY DATEDIFF(CURDATE(), MAX(order_date)) DESC) AS r_score,
            NTILE(5) OVER (ORDER BY COUNT(*)) AS f_score,
            NTILE(5) OVER (ORDER BY SUM(amount)) AS m_score
        FROM orders
        GROUP BY user_id
    ) t
) t;

⚠️ 注意事项

1. 分群数量选择

-- NTILE(5) 将用户分为5组
-- 可以根据业务需求调整为3、4、10等

-- 使用百分位数
SELECT
    PERCENTILE_CONT(0.2) WITHIN GROUP (ORDER BY recency) AS p20
FROM ...

2. 时间窗口

-- 建议使用1年的数据作为分析周期
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR)

-- 对于新用户,可能需要单独处理

3. 金额处理

-- 考虑使用中位数或平均值减少异常值影响
-- 使用分位数过滤极端值

📝 练习题

-- 建表
CREATE TABLE orders_rfm (
    order_id INT,
    user_id INT,
    amount DECIMAL(10,2),
    order_date DATE
);

INSERT INTO orders_rfm VALUES
(1, 101, 500, '2024-01-14'),
(2, 101, 300, '2024-01-10'),
(3, 102, 800, '2024-01-08'),
(4, 102, 200, '2024-01-05'),
(5, 103, 1000, '2023-06-01'),
(6, 103, 1200, '2023-06-15'),
(7, 104, 150, '2024-01-14');

要求

  1. 计算每个用户的RFM指标
  2. 给每个用户的RFM打分(1-5分)
  3. 对用户进行价值分群
  4. 统计各分群的用户数和总金额
-- 参考答案(MySQL)
-- 1. RFM指标
SELECT
    user_id,
    DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
    COUNT(*) AS frequency,
    SUM(amount) AS monetary
FROM orders_rfm
GROUP BY user_id;

-- 2. RFM打分
WITH rfm AS (
    SELECT user_id, DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
           COUNT(*) AS frequency, SUM(amount) AS monetary
    FROM orders_rfm GROUP BY user_id
)
SELECT
    user_id,
    recency,
    frequency,
    monetary,
    NTILE(5) OVER (ORDER BY recency DESC) AS r_score,  -- 越近越好
    NTILE(5) OVER (ORDER BY frequency) AS f_score,     -- 越频越好
    NTILE(5) OVER (ORDER BY monetary) AS m_score       -- 越贵越好
FROM rfm;

-- 3. 用户分群
SELECT
    user_id,
    r_score, f_score, m_score,
    CASE
        WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN '重要价值用户'
        WHEN r_score >= 4 AND f_score >= 4 THEN '重要发展用户'
        WHEN r_score < 4 AND f_score >= 4 THEN '重要保持用户'
        WHEN r_score < 4 AND f_score < 4 AND m_score >= 4 THEN '重要挽留用户'
        ELSE '一般用户'
    END AS user_segment
FROM (
    SELECT user_id,
           NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
           NTILE(5) OVER (ORDER BY frequency) AS f_score,
           NTILE(5) OVER (ORDER BY monetary) AS m_score
    FROM (SELECT user_id FROM orders_rfm GROUP BY user_id) t
) t;

-- 4. 分群统计
SELECT
    user_segment,
    COUNT(*) AS user_count,
    SUM(monetary) AS total_amount
FROM (
    SELECT user_id, r_score, f_score, m_score,
           CASE WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN '重要价值用户'
                ELSE '一般用户' END AS user_segment
    FROM (
        SELECT user_id,
               NTILE(5) OVER (ORDER BY DATEDIFF(CURDATE(), MAX(order_date)) DESC) AS r_score,
               NTILE(5) OVER (ORDER BY COUNT(*)) AS f_score,
               NTILE(5) OVER (ORDER BY SUM(amount)) AS m_score
        FROM orders_rfm GROUP BY user_id
    ) t
) t
GROUP BY user_segment;
赞(0)
未经允许不得转载:順子の杂货铺 » 26-RFM分析
搬瓦工VPS

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

分享创造快乐

联系我们联系我们