-- 统计总行数
SELECT COUNT(*) FROM users;
-- 统计非NULL值的行数
SELECT COUNT(email) FROM users;
-- 配合DISTINCT去重统计
SELECT COUNT(DISTINCT department_id) FROM employees;
-- 实际应用:统计不同状态订单数量
SELECT
COUNT(CASE WHEN status = 'pending' THEN 1 END) AS pending_count,
COUNT(CASE WHEN status = 'shipped' THEN 1 END) AS shipped_count,
COUNT(CASE WHEN status = 'delivered' THEN 1 END) AS delivered_count
FROM orders;
SUM() - 求和函数
-- 简单求和
SELECT SUM(sales_amount) FROM sales;
-- 配合条件求和
SELECT
product_id,
SUM(CASE WHEN quarter = 'Q1' THEN revenue ELSE 0 END) AS q1_revenue,
SUM(CASE WHEN quarter = 'Q2' THEN revenue ELSE 0 END) AS q2_revenue
FROM quarterly_sales
GROUP BY product_id;
-- 实际应用:计算加权平均值
SELECT
product_id,
SUM(quantity * price) / SUM(quantity) AS weighted_avg_price
FROM sales_details
GROUP BY product_id;
AVG() - 平均值函数
-- 基础平均值
SELECT AVG(salary) FROM employees;
-- 忽略NULL值
SELECT AVG(COALESCE(bonus, 0)) FROM employees;
-- 实际应用:计算移动平均
SELECT
date,
sales,
AVG(sales) OVER (ORDER BY date ROWS BETWEEN 6 PRECEDING AND CURRENT ROW)
AS moving_avg_7days
FROM daily_sales;
MIN()/MAX() - 极值函数
-- 基础用法
SELECT MIN(salary), MAX(salary) FROM employees;
-- 配合GROUP BY
SELECT department_id, MIN(hired_date), MAX(hired_date)
FROM employees
GROUP BY department_id;
-- 实际应用:找出最近活跃用户
SELECT user_id, MAX(last_login) AS last_activity
FROM user_sessions
GROUP BY user_id
HAVING last_activity > DATE_SUB(NOW(), INTERVAL 30 DAY);
-- 基础用法
SELECT department_id,
GROUP_CONCAT(employee_name ORDER BY salary DESC) AS employees
FROM employees
GROUP BY department_id;
-- 自定义分隔符和去重
SELECT project_id,
GROUP_CONCAT(DISTINCT tag_name SEPARATOR '; ') AS all_tags
FROM project_tags
GROUP BY project_id;
-- 配合SET类型限制
SET SESSION group_concat_max_len = 1000000;
SELECT GROUP_CONCAT(DISTINCT user_id ORDER BY user_id)
FROM active_users;
窗口函数(现代MySQL)
-- ROW_NUMBER() 排名
SELECT
employee_id,
department_id,
salary,
ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY salary DESC)
AS dept_rank
FROM employees;
-- RANK() 和 DENSE_RANK()
SELECT
product_id,
sales,
RANK() OVER (ORDER BY sales DESC) AS rank_with_gaps,
DENSE_RANK() OVER (ORDER BY sales DESC) AS rank_no_gaps
FROM product_sales;
-- 累计统计
SELECT
date,
revenue,
SUM(revenue) OVER (ORDER BY date) AS cumulative_revenue,
AVG(revenue) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
AS moving_avg
FROM daily_revenue;
-- 提取日期各部分
SELECT
order_date,
YEAR(order_date) AS order_year,
MONTH(order_date) AS order_month,
DAY(order_date) AS order_day,
DAYNAME(order_date) AS day_name,
WEEK(order_date) AS week_number,
QUARTER(order_date) AS quarter
FROM orders;
-- 提取时间各部分
SELECT
login_time,
HOUR(login_time) AS login_hour,
MINUTE(login_time) AS login_minute,
SECOND(login_time) AS login_second
FROM user_logins;
日期格式化
-- DATE_FORMAT 函数
SELECT
created_at,
DATE_FORMAT(created_at, '%Y-%m-%d') AS simple_date,
DATE_FORMAT(created_at, '%W, %M %D, %Y') AS full_date,
DATE_FORMAT(created_at, '%H:%i:%s') AS time_only,
DATE_FORMAT(created_at, '%Y年%m月%d日') AS chinese_date
FROM logs;
-- STR_TO_DATE 字符串转日期
SELECT STR_TO_DATE('2024-01-15 14:30:00', '%Y-%m-%d %H:%i:%s');
-- 基础日期运算
SELECT
CURRENT_DATE() AS today,
DATE_ADD(CURRENT_DATE(), INTERVAL 7 DAY) AS next_week,
DATE_SUB(CURRENT_DATE(), INTERVAL 1 MONTH) AS last_month,
DATE_ADD(CURRENT_DATE(), INTERVAL 3 HOUR) AS three_hours_later;
-- 实际应用:计算会员有效期
SELECT
user_id,
join_date,
DATE_ADD(join_date, INTERVAL 1 YEAR) AS membership_expiry,
DATEDIFF(DATE_ADD(join_date, INTERVAL 1 YEAR), CURRENT_DATE())
AS days_until_expiry
FROM members;
日期差计算
-- DATEDIFF 和 TIMESTAMPDIFF
SELECT
start_date,
end_date,
DATEDIFF(end_date, start_date) AS date_diff_days,
TIMESTAMPDIFF(HOUR, start_time, end_time) AS time_diff_hours,
TIMESTAMPDIFF(MONTH, start_date, end_date) AS month_diff
FROM projects;
-- 实际应用:计算用户活跃天数
SELECT
user_id,
COUNT(DISTINCT DATE(login_time)) AS active_days,
TIMESTAMPDIFF(DAY, MIN(login_time), MAX(login_time)) AS tracking_period
FROM user_sessions
GROUP BY user_id;
-- 计算工作日(排除周末)
DELIMITER $$
CREATE FUNCTION working_days(start_date DATE, end_date DATE)
RETURNS INT
DETERMINISTIC
BEGIN
DECLARE total_days INT;
DECLARE weekend_days INT;
SET total_days = DATEDIFF(end_date, start_date) + 1;
SET weekend_days =
(FLOOR((total_days + WEEKDAY(start_date)) / 7) * 2) +
CASE
WHEN WEEKDAY(start_date) = 6 THEN 1
WHEN WEEKDAY(start_date) + total_days % 7 > 6 THEN 1
ELSE 0
END;
RETURN total_days - weekend_days;
END$$
DELIMITER ;
时间区间处理
-- 生成日期序列
WITH RECURSIVE date_series AS (
SELECT '2024-01-01' AS date
UNION ALL
SELECT DATE_ADD(date, INTERVAL 1 DAY)
FROM date_series
WHERE date < '2024-01-31'
)
SELECT * FROM date_series;
-- 按时间段聚合
SELECT
DATE_FORMAT(created_at, '%Y-%m') AS month,
COUNT(*) AS total_orders,
SUM(amount) AS monthly_revenue
FROM orders
GROUP BY DATE_FORMAT(created_at, '%Y-%m')
ORDER BY month;
-- CONCAT 函数
SELECT
CONCAT(first_name, ' ', last_name) AS full_name,
CONCAT_WS(', ', address, city, postal_code) AS full_address
FROM customers;
-- 实际应用:生成唯一编码
SELECT
product_id,
CONCAT(
'PROD-',
YEAR(created_at),
'-',
LPAD(product_id, 6, '0')
) AS product_code
FROM products;
-- 字符串分割(MySQL 8.0+)
SELECT
SUBSTRING_INDEX(tags, ',', 1) AS first_tag,
SUBSTRING_INDEX(SUBSTRING_INDEX(tags, ',', 2), ',', -1) AS second_tag
FROM articles;
子串操作
-- SUBSTRING 函数
SELECT
email,
SUBSTRING(email, 1, POSITION('@' IN email) - 1) AS username,
SUBSTRING(email, POSITION('@' IN email) + 1) AS domain
FROM users;
-- LEFT/RIGHT 函数
SELECT
phone_number,
LEFT(phone_number, 3) AS area_code,
RIGHT(phone_number, 4) AS last_four
FROM contacts;
-- LOCATE/POSITION/INSTR
SELECT
description,
LOCATE('urgent', LOWER(description)) AS urgent_position,
CASE
WHEN description LIKE '%priority%' THEN '高优先级'
WHEN description LIKE '%normal%' THEN '普通'
ELSE '未知'
END AS priority_level
FROM tickets;
-- 全文搜索(MATCH AGAINST)
ALTER TABLE articles ADD FULLTEXT(title, content);
SELECT
title,
content,
MATCH(title, content) AGAINST('数据库 优化' IN NATURAL LANGUAGE MODE)
AS relevance_score
FROM articles
WHERE MATCH(title, content) AGAINST('数据库 优化')
ORDER BY relevance_score DESC;
替换与转换
-- REPLACE 函数
SELECT
content,
REPLACE(content, 'old_product', 'new_product') AS updated_content
FROM articles;
-- 实际应用:数据清洗
UPDATE customers
SET phone = REPLACE(REPLACE(REPLACE(phone, '-', ''), '(', ''), ')', '')
WHERE phone REGEXP '[()-]';
-- 大小写转换
SELECT
username,
LOWER(username) AS lowercase,
UPPER(username) AS uppercase,
CONCAT(UPPER(LEFT(username, 1)), LOWER(SUBSTRING(username, 2)))
AS proper_case
FROM users;
-- REGEXP 操作符
SELECT
email,
CASE
WHEN email REGEXP '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}$'
THEN '有效邮箱'
ELSE '无效邮箱'
END AS validation
FROM users;
-- 提取数字
SELECT
product_code,
REGEXP_SUBSTR(product_code, '[0-9]+') AS extracted_number
FROM products;
-- 复杂验证
SELECT
phone,
CASE
WHEN phone REGEXP '^1[3-9]\\d{9}$' THEN '手机号'
WHEN phone REGEXP '^0\\d{2,3}-\\d{7,8}$' THEN '固定电话'
ELSE '其他'
END AS phone_type
FROM contacts;
-- 生成月度销售报表
SELECT
DATE_FORMAT(order_date, '%Y-%m') AS month,
COUNT(DISTINCT order_id) AS total_orders,
COUNT(DISTINCT customer_id) AS unique_customers,
SUM(order_amount) AS total_revenue,
AVG(order_amount) AS avg_order_value,
GROUP_CONCAT(DISTINCT payment_method SEPARATOR ', ') AS payment_methods,
-- 客户留存分析
COUNT(DISTINCT CASE
WHEN customer_id IN (
SELECT DISTINCT customer_id
FROM orders
WHERE order_date < DATE_FORMAT(order_date, '%Y-%m-01')
)
THEN customer_id
END) AS retained_customers,
-- 环比增长率
ROUND(
(SUM(order_amount) - LAG(SUM(order_amount)) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')))
/ LAG(SUM(order_amount)) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) * 100,
2
) AS month_over_month_growth
FROM orders
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
ORDER BY month DESC;
-- 用户活跃度分析
WITH user_sessions_agg AS (
SELECT
user_id,
DATE(login_time) AS login_date,
COUNT(*) AS session_count,
TIMESTAMPDIFF(MINUTE, MIN(login_time), MAX(logout_time))
AS total_duration,
-- 会话间隔分析
TIMESTAMPDIFF(
MINUTE,
LAG(logout_time) OVER (PARTITION BY user_id ORDER BY login_time),
login_time
) AS session_gap
FROM user_sessions
GROUP BY user_id, DATE(login_time)
),
user_stats AS (
SELECT
user_id,
COUNT(DISTINCT login_date) AS active_days,
AVG(session_count) AS avg_sessions_per_day,
AVG(total_duration) AS avg_daily_duration,
-- 使用字符串函数生成行为模式
GROUP_CONCAT(
CASE
WHEN HOUR(login_time) BETWEEN 6 AND 12 THEN 'M'
WHEN HOUR(login_time) BETWEEN 12 AND 18 THEN 'A'
WHEN HOUR(login_time) BETWEEN 18 AND 24 THEN 'E'
ELSE 'N'
END
ORDER BY login_date
SEPARATOR ''
) AS activity_pattern
FROM user_sessions_agg
GROUP BY user_id
)
SELECT
user_id,
active_days,
avg_sessions_per_day,
avg_daily_duration,
-- 分析活动模式
CASE
WHEN activity_pattern LIKE '%M%A%E%' THEN '全天活跃'
WHEN activity_pattern REGEXP 'M.*M.*M' THEN '早晨型'
WHEN activity_pattern REGEXP 'E.*E.*E' THEN '夜晚型'
ELSE '无规律'
END AS user_type,
-- 计算活动强度评分
ROUND(
(active_days / 30.0) * 0.4 +
(avg_sessions_per_day / 10.0) * 0.3 +
(avg_daily_duration / 180.0) * 0.3,
2
) * 100 AS activity_score
FROM user_stats
ORDER BY activity_score DESC;
-- 使用覆盖索引
-- 创建复合索引
CREATE INDEX idx_sales_date_product ON sales(sale_date, product_id, amount);
-- 减少聚合数据量
-- 不佳写法
SELECT AVG(salary) FROM employees WHERE YEAR(hire_date) = 2023;
-- 优化写法
SELECT AVG(salary) FROM employees
WHERE hire_date >= '2023-01-01' AND hire_date < '2024-01-01';
-- 避免在WHERE子句中使用函数
-- 不佳写法
SELECT * FROM orders WHERE DATE_FORMAT(order_date, '%Y-%m') = '2024-01';
-- 优化写法
SELECT * FROM orders
WHERE order_date >= '2024-01-01' AND order_date < '2024-02-01';
-- 使用计算列存储预计算值
ALTER TABLE orders
ADD COLUMN order_month VARCHAR(7) AS (DATE_FORMAT(order_date, '%Y-%m')) STORED;
CREATE INDEX idx_order_month ON orders(order_month);
-- 避免全表扫描
-- 不佳写法
SELECT * FROM products WHERE LOWER(product_name) LIKE '%laptop%';
-- 优化写法(使用全文索引)
ALTER TABLE products ADD FULLTEXT(product_name);
SELECT * FROM products WHERE MATCH(product_name) AGAINST('laptop');
-- 或者使用前缀索引
CREATE INDEX idx_product_name ON products(product_name(20));
COUNT(1) 替代 COUNT(*) 在某些场景下性能更好APPROX_COUNT_DISTINCT)这些函数和技巧的深入理解可以帮助您编写更高效、更强大的MySQL查询,处理复杂的数据分析需求。