//at.alicdn.com/t/font_8d5l8fzk5b87iudi.css
顺子の杂货铺
生命不息,折腾不止,且行且珍惜~

06-字符串处理函数详解

DMIT VPS

字符串处理函数详解

字符串处理是SQL中最常用的操作之一,本篇涵盖截取、拼接、替换、转换等常见场景。

📊 支持情况

数据库SUBSTRCONCATTRIMREPLACESPLIT
MySQL
Oracle
ClickHouse
Hologres
MaxCompute
Hive

1. 字符串截取

SUBSTR / SUBSTRING

MySQL

-- 从位置开始截取(1开始)
SELECT SUBSTR('Hello World', 1, 5);  -- Hello
SELECT SUBSTRING('Hello World', 7);  -- World

-- 从右边开始
SELECT SUBSTR('Hello World', -5);    -- World

Oracle

-- Oracle从1开始
SELECT SUBSTR('Hello World', 1, 5) FROM dual;  -- Hello
SELECT SUBSTR('Hello World', 7) FROM dual;     -- World
SELECT SUBSTR('Hello World', -5) FROM dual;    -- World

ClickHouse

SELECT substr('Hello World', 1, 5);  -- Hello
SELECT substring('Hello World', 7);  -- World

Hive / MaxCompute / Hologres

SELECT substr('Hello World', 1, 5);  -- Hello
SELECT substring('Hello World', 7);  -- World

LEFT / RIGHT

-- MySQL / Hive / MaxCompute / Hologres
SELECT LEFT('Hello World', 5);   -- Hello
SELECT RIGHT('Hello World', 5);  -- World

-- Oracle
SELECT SUBSTR('Hello World', 1, 5) FROM dual;
SELECT SUBSTR('Hello World', -5) FROM dual;

MID / CHARINDEX(Oracle)

-- Oracle特定函数
SELECT INSTR('Hello World', 'World') FROM dual;  -- 7
SELECT SUBSTR('Hello World', INSTR('Hello World', ' ')) FROM dual;  --  World

2. 字符串拼接

CONCAT

-- MySQL / Hive / MaxCompute / Hologres
SELECT CONCAT('Hello', ' ', 'World');  -- Hello World

-- 多列拼接
SELECT CONCAT(first_name, ' ', last_name) AS full_name
FROM users;

CONCAT_WS

-- 带分隔符的拼接
SELECT CONCAT_WS('-', '2024', '01', '14');  -- 2024-01-14

-- 多列拼接
SELECT CONCAT_WS(',', name, email, phone) AS contact_info
FROM users;

Oracle CONCAT ||

-- Oracle使用 || 拼接
SELECT 'Hello' || ' ' || 'World' FROM dual;  -- Hello World

-- CONCAT函数(Oracle最多2个参数)
SELECT CONCAT(CONCAT('Hello', ' '), 'World') FROM dual;

ClickHouse

SELECT concat('Hello', ' ', 'World');  -- Hello World
SELECT 'Hello' || ' ' || 'World';      -- Hello World

GROUP_CONCAT(MySQL)

-- 将多行拼接成一行
SELECT
    dept_id,
    GROUP_CONCAT(employee_name SEPARATOR ', ') AS employees
FROM employees
GROUP BY dept_id;

-- 带排序
SELECT
    dept_id,
    GROUP_CONCAT(employee_name ORDER BY salary DESC SEPARATOR ', ') AS employees
FROM employees
GROUP BY dept_id;

WM_CONCAT(Oracle / Hive)

-- Oracle
SELECT dept_id, WM_CONCAT(',', employee_name) FROM employees GROUP BY dept_id;

-- Hive
SELECT dept_id, concat_ws(',', collect_set(employee_name)) FROM employees GROUP BY dept_id;

3. 去除空白

TRIM 系列

-- 去除两端空格
SELECT TRIM('  Hello  ');  -- Hello

-- 去除左侧空格
SELECT LTRIM('  Hello');   -- Hello

-- 去除右侧空格
SELECT RTRIM('Hello  ');   -- Hello

自定义去除字符

-- MySQL / Hive
SELECT TRIM(BOTH ',' FROM ',Hello,');  -- Hello

-- Oracle
SELECT TRIM(',' FROM ',Hello,') FROM dual;  -- Hello

4. 大小写转换

-- 转大写
SELECT UPPER('Hello');  -- HELLO

-- 转小写
SELECT LOWER('HELLO');  -- hello

-- 首字母大写
SELECT CONCAT(UPPER(SUBSTR(name, 1, 1)), LOWER(SUBSTR(name, 2))) AS name
FROM users;

5. 字符串替换

REPLACE

-- MySQL / Hive / MaxCompute / Hologres
SELECT REPLACE('Hello World', 'World', 'SQL');  -- Hello SQL

-- 去除特定字符
SELECT REPLACE(phone, '-', '') FROM users;

-- Oracle
SELECT REPLACE('Hello World', 'World', 'SQL') FROM dual;

REGEXP_REPLACE(正则替换)

-- MySQL 8.0+
SELECT REGEXP_REPLACE('Hello123World', '[0-9]', '');  -- HelloWorld

-- Oracle
SELECT REGEXP_REPLACE('Hello123World', '[0-9]', '') FROM dual;

-- Hive
SELECT regexp_replace('Hello123World', '[0-9]', '');

ClickHouse

SELECT replaceAll('Hello123World', '[0-9]', '');  -- 使用正则
SELECT replace('Hello123World', '123', '');       -- 普通替换

6. 字符串查找

INSTR / CHARINDEX

-- MySQL
SELECT INSTR('Hello World', 'World');  -- 7
SELECT INSTR('Hello World', 'SQL');    -- 0(未找到)

-- Oracle
SELECT INSTR('Hello World', 'World') FROM dual;  -- 7

-- Hive / MaxCompute
SELECT instr('Hello World', 'World');  -- 7

POSITION

-- 标准SQL
SELECT POSITION('World' IN 'Hello World');  -- 7

-- ClickHouse
SELECT position('Hello World', 'World');  -- 7

LIKE 模糊匹配

-- 包含
SELECT * FROM users WHERE name LIKE '%张%';

-- 开头
SELECT * FROM users WHERE email LIKE 'admin%';

-- 结尾
SELECT * FROM users WHERE phone LIKE '%1234';

-- 长度匹配
SELECT * FROM users WHERE LENGTH(name) = 3;

7. 字符串分割

SPLIT

-- Hive / MaxCompute / Hologres
SELECT split('a,b,c,d', ',');  -- 返回数组 ["a","b","c","d"]

-- 取分割后的第1个元素
SELECT split('a,b,c,d', ',')[0];  -- a
SELECT split('a,b,c,d', ',')[1];  -- b

-- ClickHouse
SELECT splitByString(',', 'a,b,c,d');

-- MySQL 8.0+
SELECT SUBSTRING_INDEX('a,b,c,d', ',', 1);  -- a
SELECT SUBSTRING_INDEX(SUBSTRING_INDEX('a,b,c,d', ',', 2), ',', -1);  -- b

REGEXP_EXTRACT

-- Hive / MaxCompute
SELECT regexp_extract('user123@example.com', '([a-zA-Z]+)', 1);  -- user
SELECT regexp_extract('price: $100', '\\$(\\d+)', 1);  -- 100

-- Oracle
SELECT REGEXP_SUBSTR('user123@example.com', '[a-zA-Z]+') FROM dual;

-- MySQL 8.0+
SELECT REGEXP_SUBSTR('user123@example.com', '[a-zA-Z]+');  -- user

8. 格式化函数

LPAD / RPAD

-- 左侧补齐
SELECT LPAD('123', 6, '0');   -- 000123
SELECT LPAD('AB', 5, '*');    -- ***AB

-- 右侧补齐
SELECT RPAD('123', 6, '0');   -- 123000
SELECT RPAD('AB', 5, '*');    -- AB***

REPEAT

SELECT REPEAT('*', 10);  -- **********

REVERSE

SELECT REVERSE('Hello');  -- olleH

9. 长度计算

-- 字符长度
SELECT LENGTH('Hello');     -- 5(MySQL/Hive/Oracle返回字符数)
SELECT CHAR_LENGTH('Hello'); -- 5(字符数)
SELECT CHARACTER_LENGTH('Hello'); -- 标准SQL

-- 字节长度
SELECT OCTET_LENGTH('Hello');  -- 5(字节数,中文不同)

-- 中文处理
SELECT LENGTH('你好');   -- MySQL: 6(UTF-8占3字节)
SELECT LENGTH('你好');   -- Oracle: 2(按字符)

10. 实战案例

场景1:手机号脱敏

-- MySQL / Hive / MaxCompute
SELECT
    phone,
    CONCAT(
        SUBSTR(phone, 1, 3),
        '****',
        SUBSTR(phone, 8, 4)
    ) AS phone_masked
FROM users;

场景2:邮箱提取用户名

SELECT
    email,
    SUBSTRING_INDEX(email, '@', 1) AS username
FROM users;

场景3:解析URL

-- Hive
SELECT
    url,
    parse_url(url, 'HOST') AS host,
    parse_url(url, 'PATH') AS path,
    parse_url(url, 'QUERY', 'id') AS query_id
FROM web_logs;

场景4:数据清洗

SELECT
    TRIM(UPPER(REPLACE(REPLACE(name, '\t', ''), '\n', ''))) AS clean_name
FROM raw_data;

⚠️ 注意事项

数据库字符串截取起始位置特殊说明
MySQL1SUBSTR和SUBSTRING等价
Oracle1SUBSTR参数从1开始
Hive1
MaxCompute1
ClickHouse1
PostgreSQL1

📝 练习题

-- 建表
CREATE TABLE user_profiles (
    user_id INT,
    name STRING,
    email STRING,
    phone STRING,
    address STRING
);

-- 插入数据
INSERT INTO user_profiles VALUES
(1, '张三', 'zhangsan@email.com', '13812345678', '北京市朝阳区'),
(2, '李四', 'lisi@email.com', '13987654321', ' 上海市浦东新区  ');

要求

  1. 提取邮箱@前的用户名
  2. 手机号中间四位脱敏
  3. 去除地址两端空格
  4. 姓名转大写
-- 参考答案
SELECT
    user_id,
    UPPER(name) AS name_upper,
    SUBSTRING_INDEX(email, '@', 1) AS username,
    CONCAT(SUBSTR(phone, 1, 3), '****', SUBSTR(phone, 8, 4)) AS phone_masked,
    TRIM(address) AS address_clean
FROM user_profiles;
赞(0)
未经允许不得转载:順子の杂货铺 » 06-字符串处理函数详解
搬瓦工VPS

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址

分享创造快乐

联系我们联系我们