RFM分析
RFM分析是用户价值分群的经典方法,通过Recency(最近)、Frequency(频率)、Monetary(金额)三个维度对用户进行分类。
📊 支持情况
| 数据库 | MAX/MIN | COUNT | SUM | NTILE | WINDOW | 备注 |
|---|---|---|---|---|---|---|
| MySQL | ✅ | ✅ | ✅ | ✅ 8.0+ | ✅ | 完整支持 |
| Oracle | ✅ | ✅ | ✅ | ✅ | ✅ | 完整支持 |
| ClickHouse | ✅ | ✅ | ✅ | ✅ | ✅ | 完整支持 |
| Hologres | ✅ | ✅ | ✅ | ✅ | ✅ | 完整支持 |
| MaxCompute | ✅ | ✅ | ✅ | ✅ | ✅ | 完整支持 |
| Hive | ✅ | ✅ | ✅ | ✅ | ✅ | 完整支持 |
1. RFM指标计算
基础RFM计算
-- MySQL - 计算RFM指标
WITH rfm AS (
SELECT
user_id,
MAX(order_date) AS last_order_date,
COUNT(*) AS frequency,
SUM(amount) AS monetary
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR)
GROUP BY user_id
)
SELECT
user_id,
last_order_date,
DATEDIFF(CURDATE(), last_order_date) AS recency_days,
frequency,
monetary,
RANK() OVER (ORDER BY DATEDIFF(CURDATE(), last_order_date)) AS r_rank,
RANK() OVER (ORDER BY frequency DESC) AS f_rank,
RANK() OVER (ORDER BY monetary DESC) AS m_rank
FROM rfm;计算RFM分值
-- MySQL - 使用NTILE分组
WITH rfm AS (
SELECT
user_id,
DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
COUNT(*) AS frequency,
SUM(amount) AS monetary
FROM orders
GROUP BY user_id
),
rfm_score AS (
SELECT
user_id,
recency,
frequency,
monetary,
NTILE(5) OVER (ORDER BY recency DESC) AS r_score, -- 越小越好,反向
NTILE(5) OVER (ORDER BY frequency) AS f_score, -- 越大越好
NTILE(5) OVER (ORDER BY monetary) AS m_score -- 越大越好
FROM rfm
)
SELECT
user_id,
recency,
frequency,
monetary,
r_score,
f_score,
m_score,
CONCAT(r_score, f_score, m_score) AS rfm_combined
FROM rfm_score;2. 用户分群
基于RFM的用户分群
-- MySQL - 用户价值分群
SELECT
user_id,
r_score,
f_score,
m_score,
CASE
WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN '重要价值用户'
WHEN r_score >= 4 AND f_score < 4 AND m_score >= 4 THEN '重要发展用户'
WHEN r_score < 4 AND f_score >= 4 AND m_score >= 4 THEN '重要保持用户'
WHEN r_score < 4 AND f_score < 4 AND m_score >= 4 THEN '重要挽留用户'
WHEN r_score >= 4 AND f_score >= 4 AND m_score < 4 THEN '一般价值用户'
WHEN r_score >= 4 AND f_score < 4 AND m_score < 4 THEN '一般发展用户'
WHEN r_score < 4 AND f_score >= 4 AND m_score < 4 THEN '一般保持用户'
ELSE '一般挽留用户'
END AS user_segment
FROM (
SELECT
user_id,
NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
NTILE(5) OVER (ORDER BY frequency) AS f_score,
NTILE(5) OVER (ORDER BY monetary) AS m_score
FROM (
SELECT
user_id,
DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
COUNT(*) AS frequency,
SUM(amount) AS monetary
FROM orders
GROUP BY user_id
) t
) t;用户分群统计
-- MySQL - 各分群用户统计
SELECT
user_segment,
COUNT(*) AS user_count,
ROUND(COUNT(*) * 100.0 / SUM(COUNT(*)) OVER (), 2) AS pct,
ROUND(AVG(monetary), 2) AS avg_monetary,
ROUND(AVG(frequency), 2) AS avg_frequency
FROM (
SELECT
user_id,
monetary,
frequency,
CASE
WHEN r_score >= 4 AND f_score >= 4 THEN '高价值'
WHEN r_score >= 4 THEN '新用户'
WHEN f_score >= 4 THEN '活跃用户'
ELSE '沉默用户'
END AS user_segment
FROM (
SELECT
user_id,
DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
COUNT(*) AS frequency,
SUM(amount) AS monetary,
NTILE(5) OVER (ORDER BY DATEDIFF(CURDATE(), MAX(order_date)) DESC) AS r_score,
NTILE(5) OVER (ORDER BY COUNT(*)) AS f_score
FROM orders
GROUP BY user_id
) t
) t
GROUP BY user_segment;3. 实战案例
场景1:电商用户价值分层
-- MySQL
WITH rfm_base AS (
SELECT
user_id,
DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
COUNT(DISTINCT order_id) AS frequency,
SUM(amount) AS monetary
FROM orders
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 365 DAY)
GROUP BY user_id
),
rfm_scores AS (
SELECT
user_id,
recency,
frequency,
monetary,
NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
NTILE(5) OVER (ORDER BY frequency DESC) AS f_score,
NTILE(5) OVER (ORDER BY monetary DESC) AS m_score
FROM rfm_base
)
SELECT
r_score,
f_score,
COUNT(*) AS users,
ROUND(SUM(monetary), 2) AS total_revenue,
ROUND(AVG(monetary), 2) AS avg_revenue
FROM rfm_scores
GROUP BY r_score, f_score
ORDER BY r_score, f_score;场景2:流失预警
-- MySQL - 识别高风险流失用户
SELECT
user_id,
recency,
frequency,
monetary,
r_score,
CASE
WHEN r_score = 1 THEN '高流失风险'
WHEN r_score = 2 THEN '较高风险'
WHEN r_score = 3 THEN '中等风险'
ELSE '低风险'
END AS churn_risk,
recommended_action
FROM (
SELECT
user_id,
DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
COUNT(*) AS frequency,
SUM(amount) AS monetary,
NTILE(5) OVER (ORDER BY DATEDIFF(CURDATE(), MAX(order_date))) AS r_score
FROM orders
GROUP BY user_id
) t
ORDER BY r_score;场景3:用户生命周期价值
-- MySQL - CLV预测
WITH rfm AS (
SELECT
user_id,
DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
COUNT(*) AS frequency,
SUM(amount) AS monetary
FROM orders
GROUP BY user_id
),
clv AS (
SELECT
user_id,
monetary AS ltv,
monetary / NULLIF(frequency, 0) AS avg_order_value,
CASE
WHEN recency <= 30 THEN '活跃'
WHEN recency <= 90 THEN '沉睡'
ELSE '流失'
END AS status
FROM rfm
)
SELECT
status,
COUNT(*) AS users,
ROUND(AVG(ltv), 2) AS avg_ltv,
ROUND(AVG(avg_order_value), 2) AS avg_aov
FROM clv
GROUP BY status;场景4:RFM交叉分析
-- MySQL - RFM热力图数据
SELECT
r_score,
f_score,
COUNT(*) AS user_count,
ROUND(AVG(monetary), 2) AS avg_monetary
FROM (
SELECT
user_id,
NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
NTILE(5) OVER (ORDER BY frequency DESC) AS f_score,
SUM(amount) AS monetary
FROM (
SELECT
user_id,
DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
SUM(amount) AS monetary
FROM orders
GROUP BY user_id
) t
) t
GROUP BY r_score, f_score
ORDER BY r_score, f_score;4. 用户生命周期管理
用户状态标记
-- MySQL - 用户生命周期状态
WITH latest_order AS (
SELECT
user_id,
MAX(order_date) AS last_order,
COUNT(*) AS order_count,
SUM(amount) AS total_amount
FROM orders
GROUP BY user_id
),
user_status AS (
SELECT
user_id,
last_order,
DATEDIFF(CURDATE(), last_order) AS days_since_last,
order_count,
total_amount,
CASE
WHEN days_since_last <= 30 THEN '活跃'
WHEN days_since_last <= 90 THEN '沉睡'
WHEN days_since_last <= 180 THEN '不活跃'
ELSE '流失'
END AS status
FROM latest_order
)
SELECT
status,
COUNT(*) AS users,
ROUND(AVG(total_amount), 2) AS avg_spent
FROM user_status
GROUP BY status;差异化运营策略
-- MySQL - 生成运营建议
SELECT
user_id,
user_segment,
CASE
WHEN user_segment = '重要价值用户' THEN '提供VIP专属服务,保持高价值'
WHEN user_segment = '重要发展用户' THEN '推送高价值商品,提升消费频次'
WHEN user_segment = '重要保持用户' THEN '发送召回优惠,唤醒活跃'
WHEN user_segment = '重要挽留用户' THEN '紧急召回,分析流失原因'
WHEN user_segment = '一般价值用户' THEN '培养消费习惯,引导升级'
WHEN user_segment = '一般发展用户' THEN '首单优惠,促进复购'
WHEN user_segment = '一般保持用户' THEN '定期触达,维护关系'
ELSE '成本较高,降低运营投入'
END AS recommended_action
FROM (
SELECT
user_id,
CASE
WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN '重要价值用户'
WHEN r_score >= 4 AND f_score >= 4 THEN '重要发展用户'
WHEN r_score < 4 AND f_score >= 4 THEN '重要保持用户'
WHEN r_score < 4 AND f_score < 4 AND m_score >= 4 THEN '重要挽留用户'
ELSE '一般用户'
END AS user_segment,
r_score, f_score, m_score
FROM (
SELECT
user_id,
NTILE(5) OVER (ORDER BY DATEDIFF(CURDATE(), MAX(order_date)) DESC) AS r_score,
NTILE(5) OVER (ORDER BY COUNT(*)) AS f_score,
NTILE(5) OVER (ORDER BY SUM(amount)) AS m_score
FROM orders
GROUP BY user_id
) t
) t;⚠️ 注意事项
1. 分群数量选择
-- NTILE(5) 将用户分为5组
-- 可以根据业务需求调整为3、4、10等
-- 使用百分位数
SELECT
PERCENTILE_CONT(0.2) WITHIN GROUP (ORDER BY recency) AS p20
FROM ...2. 时间窗口
-- 建议使用1年的数据作为分析周期
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 1 YEAR)
-- 对于新用户,可能需要单独处理3. 金额处理
-- 考虑使用中位数或平均值减少异常值影响
-- 使用分位数过滤极端值📝 练习题
-- 建表
CREATE TABLE orders_rfm (
order_id INT,
user_id INT,
amount DECIMAL(10,2),
order_date DATE
);
INSERT INTO orders_rfm VALUES
(1, 101, 500, '2024-01-14'),
(2, 101, 300, '2024-01-10'),
(3, 102, 800, '2024-01-08'),
(4, 102, 200, '2024-01-05'),
(5, 103, 1000, '2023-06-01'),
(6, 103, 1200, '2023-06-15'),
(7, 104, 150, '2024-01-14');要求:
- 计算每个用户的RFM指标
- 给每个用户的RFM打分(1-5分)
- 对用户进行价值分群
- 统计各分群的用户数和总金额
-- 参考答案(MySQL)
-- 1. RFM指标
SELECT
user_id,
DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
COUNT(*) AS frequency,
SUM(amount) AS monetary
FROM orders_rfm
GROUP BY user_id;
-- 2. RFM打分
WITH rfm AS (
SELECT user_id, DATEDIFF(CURDATE(), MAX(order_date)) AS recency,
COUNT(*) AS frequency, SUM(amount) AS monetary
FROM orders_rfm GROUP BY user_id
)
SELECT
user_id,
recency,
frequency,
monetary,
NTILE(5) OVER (ORDER BY recency DESC) AS r_score, -- 越近越好
NTILE(5) OVER (ORDER BY frequency) AS f_score, -- 越频越好
NTILE(5) OVER (ORDER BY monetary) AS m_score -- 越贵越好
FROM rfm;
-- 3. 用户分群
SELECT
user_id,
r_score, f_score, m_score,
CASE
WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN '重要价值用户'
WHEN r_score >= 4 AND f_score >= 4 THEN '重要发展用户'
WHEN r_score < 4 AND f_score >= 4 THEN '重要保持用户'
WHEN r_score < 4 AND f_score < 4 AND m_score >= 4 THEN '重要挽留用户'
ELSE '一般用户'
END AS user_segment
FROM (
SELECT user_id,
NTILE(5) OVER (ORDER BY recency DESC) AS r_score,
NTILE(5) OVER (ORDER BY frequency) AS f_score,
NTILE(5) OVER (ORDER BY monetary) AS m_score
FROM (SELECT user_id FROM orders_rfm GROUP BY user_id) t
) t;
-- 4. 分群统计
SELECT
user_segment,
COUNT(*) AS user_count,
SUM(monetary) AS total_amount
FROM (
SELECT user_id, r_score, f_score, m_score,
CASE WHEN r_score >= 4 AND f_score >= 4 AND m_score >= 4 THEN '重要价值用户'
ELSE '一般用户' END AS user_segment
FROM (
SELECT user_id,
NTILE(5) OVER (ORDER BY DATEDIFF(CURDATE(), MAX(order_date)) DESC) AS r_score,
NTILE(5) OVER (ORDER BY COUNT(*)) AS f_score,
NTILE(5) OVER (ORDER BY SUM(amount)) AS m_score
FROM orders_rfm GROUP BY user_id
) t
) t
GROUP BY user_segment;
順子の杂货铺


