From 4b70b8eacf28e0575e93b1531aa0b44005612d0a Mon Sep 17 00:00:00 2001 From: haoliang <821644@qq.com> Date: Wed, 6 May 2026 21:09:18 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=97=A5=E5=BF=97=E5=88=86=E5=8C=BA?= =?UTF-8?q?=E7=AE=A1=E7=90=86=E4=BC=98=E5=8C=96=E2=80=94=E2=80=94sp=5Fensu?= =?UTF-8?q?re=5Fpartitions=E8=A6=86=E7=9B=963=E5=BC=A0=E5=88=86=E5=8C=BA?= =?UTF-8?q?=E8=A1=A8=EF=BC=88=E5=90=ABlog=5Fcollect=5Fraw=EF=BC=89?= =?UTF-8?q?=EF=BC=9BLogCleanupJob=E6=94=B9=E7=94=A8DROP=20PARTITION?= =?UTF-8?q?=E6=B8=85=E7=90=86=EF=BC=9B=E4=BF=AE=E5=A4=8D=E5=88=86=E5=8C=BA?= =?UTF-8?q?=E8=BE=B9=E7=95=8C=E8=AE=A1=E7=AE=97bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- database/sqls/07-log-tables-partition.sql | 216 ++++++++++++++++++++++ src/CncCollector/Jobs/LogCleanupJob.cs | 115 +++++++++--- 2 files changed, 307 insertions(+), 24 deletions(-) create mode 100644 database/sqls/07-log-tables-partition.sql diff --git a/database/sqls/07-log-tables-partition.sql b/database/sqls/07-log-tables-partition.sql new file mode 100644 index 0000000..45bee05 --- /dev/null +++ b/database/sqls/07-log-tables-partition.sql @@ -0,0 +1,216 @@ +-- ============================================================ +-- 日志表按月分区统一管理(幂等迁移脚本) +-- 创建时间:2026-05-06 +-- 说明:确保 log_collect_raw、log_collect_analysis、log_collect_cycle +-- 三张日志表均按月分区,并统一存储过程管理 +-- 执行前提:USE cnc_log; 已执行 01-init-schema.sql 和 03-collect-analysis-tables.sql +-- ============================================================ + +USE cnc_log; + +-- ============================================================ +-- 1. log_collect_raw 按月分区 +-- 该表在 01-init-schema.sql 中已定义分区,此处确认分区存在 +-- 分区键:request_time +-- ============================================================ +-- 检查是否已有分区,若无则重建(幂等) +SET @has_partition := (SELECT COUNT(*) FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_raw' AND PARTITION_NAME IS NOT NULL); + +-- 如果表没有分区(旧表),则需要重建 +-- 注意:如果表已有分区(从DDL创建),此步骤会跳过 +SET @sql_rebuild := IF(@has_partition = 0, + 'ALTER TABLE cnc_log.log_collect_raw PARTITION BY RANGE (TO_DAYS(request_time)) ( + PARTITION p202604 VALUES LESS THAN (TO_DAYS(''2026-05-01'')), + PARTITION p202605 VALUES LESS THAN (TO_DAYS(''2026-06-01'')), + PARTITION p202606 VALUES LESS THAN (TO_DAYS(''2026-07-01'')), + PARTITION p202607 VALUES LESS THAN (TO_DAYS(''2026-08-01'')), + PARTITION p_future VALUES LESS THAN MAXVALUE + )', + 'SELECT ''log_collect_raw 已有分区,跳过'''); +PREPARE stmt FROM @sql_rebuild; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; + +-- ============================================================ +-- 2. log_collect_analysis 按月分区 +-- 该表在 03-collect-analysis-tables.sql 中已定义分区 +-- 分区键:analysis_time +-- ============================================================ +SET @has_partition_a := (SELECT COUNT(*) FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_analysis' AND PARTITION_NAME IS NOT NULL); + +SET @sql_rebuild_a := IF(@has_partition_a = 0, + 'ALTER TABLE cnc_log.log_collect_analysis PARTITION BY RANGE (TO_DAYS(analysis_time)) ( + PARTITION p202605 VALUES LESS THAN (TO_DAYS(''2026-06-01'')), + PARTITION p202606 VALUES LESS THAN (TO_DAYS(''2026-07-01'')), + PARTITION p202607 VALUES LESS THAN (TO_DAYS(''2026-08-01'')), + PARTITION p_future VALUES LESS THAN MAXVALUE + )', + 'SELECT ''log_collect_analysis 已有分区,跳过'''); +PREPARE stmt FROM @sql_rebuild_a; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; + +-- ============================================================ +-- 3. log_collect_cycle 按月分区 +-- 该表在 03-collect-analysis-tables.sql 中已定义分区 +-- 分区键:cycle_time +-- ============================================================ +SET @has_partition_c := (SELECT COUNT(*) FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_cycle' AND PARTITION_NAME IS NOT NULL); + +SET @sql_rebuild_c := IF(@has_partition_c = 0, + 'ALTER TABLE cnc_log.log_collect_cycle PARTITION BY RANGE (TO_DAYS(cycle_time)) ( + PARTITION p202605 VALUES LESS THAN (TO_DAYS(''2026-06-01'')), + PARTITION p202606 VALUES LESS THAN (TO_DAYS(''2026-07-01'')), + PARTITION p202607 VALUES LESS THAN (TO_DAYS(''2026-08-01'')), + PARTITION p_future VALUES LESS THAN MAXVALUE + )', + 'SELECT ''log_collect_cycle 已有分区,跳过'''); +PREPARE stmt FROM @sql_rebuild_c; +EXECUTE stmt; +DEALLOCATE PREPARE stmt; + +-- ============================================================ +-- 4. 更新存储过程 sp_ensure_partitions:覆盖全部3张分区表 +-- ============================================================ +DROP PROCEDURE IF EXISTS sp_ensure_partitions; +DELIMITER $$ +CREATE PROCEDURE sp_ensure_partitions() +BEGIN + -- 当前月的第一天 + SET @base := DATE_FORMAT(CURDATE(), '%Y-%m-01'); + SET @d1 := DATE_ADD(@base, INTERVAL 1 MONTH); + SET @d2 := DATE_ADD(@base, INTERVAL 2 MONTH); + SET @p1 := CONCAT('p', DATE_FORMAT(@d1, '%Y%m')); + SET @p2 := CONCAT('p', DATE_FORMAT(@d2, '%Y%m')); + + -- ============================ + -- log_collect_raw(分区键:request_time) + -- ============================ + IF NOT EXISTS (SELECT 1 FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_raw' AND PARTITION_NAME = @p1) THEN + SET @v1 := DATE_FORMAT(@d1, '%Y-%m-01'); + SET @sql := CONCAT('ALTER TABLE cnc_log.log_collect_raw ADD PARTITION (PARTITION ', @p1, + ' VALUES LESS THAN (TO_DAYS(', '''', @v1, '''', '))'); + PREPARE stmt FROM @sql; EXECUTE stmt; DEALLOCATE PREPARE stmt; + INSERT IGNORE INTO log_partition_tracker(table_name, partition_name, partition_value) VALUES ('log_collect_raw', @p1, @v1); + END IF; + + IF NOT EXISTS (SELECT 1 FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_raw' AND PARTITION_NAME = @p2) THEN + SET @v2 := DATE_FORMAT(@d2, '%Y-%m-01'); + SET @sql := CONCAT('ALTER TABLE cnc_log.log_collect_raw ADD PARTITION (PARTITION ', @p2, + ' VALUES LESS THAN (TO_DAYS(', '''', @v2, '''', '))'); + PREPARE stmt FROM @sql; EXECUTE stmt; DEALLOCATE PREPARE stmt; + INSERT IGNORE INTO log_partition_tracker(table_name, partition_name, partition_value) VALUES ('log_collect_raw', @p2, @v2); + END IF; + + -- ============================ + -- log_collect_analysis(分区键:analysis_time) + -- ============================ + IF NOT EXISTS (SELECT 1 FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_analysis' AND PARTITION_NAME = @p1) THEN + SET @v1 := DATE_FORMAT(@d1, '%Y-%m-01'); + SET @sql := CONCAT('ALTER TABLE cnc_log.log_collect_analysis ADD PARTITION (PARTITION ', @p1, + ' VALUES LESS THAN (TO_DAYS(', '''', @v1, '''', '))'); + PREPARE stmt FROM @sql; EXECUTE stmt; DEALLOCATE PREPARE stmt; + INSERT IGNORE INTO log_partition_tracker(table_name, partition_name, partition_value) VALUES ('log_collect_analysis', @p1, @v1); + END IF; + + IF NOT EXISTS (SELECT 1 FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_analysis' AND PARTITION_NAME = @p2) THEN + SET @v2 := DATE_FORMAT(@d2, '%Y-%m-01'); + SET @sql := CONCAT('ALTER TABLE cnc_log.log_collect_analysis ADD PARTITION (PARTITION ', @p2, + ' VALUES LESS THAN (TO_DAYS(', '''', @v2, '''', '))'); + PREPARE stmt FROM @sql; EXECUTE stmt; DEALLOCATE PREPARE stmt; + INSERT IGNORE INTO log_partition_tracker(table_name, partition_name, partition_value) VALUES ('log_collect_analysis', @p2, @v2); + END IF; + + -- ============================ + -- log_collect_cycle(分区键:cycle_time) + -- ============================ + IF NOT EXISTS (SELECT 1 FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_cycle' AND PARTITION_NAME = @p1) THEN + SET @v1 := DATE_FORMAT(@d1, '%Y-%m-01'); + SET @sql := CONCAT('ALTER TABLE cnc_log.log_collect_cycle ADD PARTITION (PARTITION ', @p1, + ' VALUES LESS THAN (TO_DAYS(', '''', @v1, '''', '))'); + PREPARE stmt FROM @sql; EXECUTE stmt; DEALLOCATE PREPARE stmt; + INSERT IGNORE INTO log_partition_tracker(table_name, partition_name, partition_value) VALUES ('log_collect_cycle', @p1, @v1); + END IF; + + IF NOT EXISTS (SELECT 1 FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_cycle' AND PARTITION_NAME = @p2) THEN + SET @v2 := DATE_FORMAT(@d2, '%Y-%m-01'); + SET @sql := CONCAT('ALTER TABLE cnc_log.log_collect_cycle ADD PARTITION (PARTITION ', @p2, + ' VALUES LESS THAN (TO_DAYS(', '''', @v2, '''', '))'); + PREPARE stmt FROM @sql; EXECUTE stmt; DEALLOCATE PREPARE stmt; + INSERT IGNORE INTO log_partition_tracker(table_name, partition_name, partition_value) VALUES ('log_collect_cycle', @p2, @v2); + END IF; +END$$ +DELIMITER ; + +-- ============================================================ +-- 5. 更新 sp_check_partitions:覆盖全部3张分区表 +-- ============================================================ +DROP PROCEDURE IF EXISTS sp_check_partitions; +DELIMITER $$ +CREATE PROCEDURE sp_check_partitions() +BEGIN + SET @base := DATE_FORMAT(CURDATE(), '%Y-%m-01'); + SET @d1 := DATE_ADD(@base, INTERVAL 1 MONTH); + SET @d2 := DATE_ADD(@base, INTERVAL 2 MONTH); + SET @p1 := CONCAT('p', DATE_FORMAT(@d1, '%Y%m')); + SET @p2 := CONCAT('p', DATE_FORMAT(@d2, '%Y%m')); + + SET @need := 0; + + -- log_collect_raw + IF (SELECT COUNT(*) FROM information_schema.PARTITIONS WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_raw' AND PARTITION_NAME = @p1) = 0 THEN SET @need = 1; END IF; + IF (SELECT COUNT(*) FROM information_schema.PARTITIONS WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_raw' AND PARTITION_NAME = @p2) = 0 THEN SET @need = 1; END IF; + + -- log_collect_analysis + IF (SELECT COUNT(*) FROM information_schema.PARTITIONS WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_analysis' AND PARTITION_NAME = @p1) = 0 THEN SET @need = 1; END IF; + IF (SELECT COUNT(*) FROM information_schema.PARTITIONS WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_analysis' AND PARTITION_NAME = @p2) = 0 THEN SET @need = 1; END IF; + + -- log_collect_cycle + IF (SELECT COUNT(*) FROM information_schema.PARTITIONS WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_cycle' AND PARTITION_NAME = @p1) = 0 THEN SET @need = 1; END IF; + IF (SELECT COUNT(*) FROM information_schema.PARTITIONS WHERE TABLE_SCHEMA = 'cnc_log' AND TABLE_NAME = 'log_collect_cycle' AND PARTITION_NAME = @p2) = 0 THEN SET @need = 1; END IF; + + IF @need = 1 THEN + CALL sp_ensure_partitions(); + END IF; + + SELECT @need AS need_partition_creation; +END$$ +DELIMITER ; + +-- ============================================================ +-- 6. 确保分区追踪表存在 +-- ============================================================ +CREATE TABLE IF NOT EXISTS log_partition_tracker ( + table_name VARCHAR(100) NOT NULL, + partition_name VARCHAR(50) NOT NULL, + partition_value VARCHAR(30) NOT NULL, + created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (table_name, partition_name) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci + COMMENT='分区管理追踪表'; + +-- ============================================================ +-- 7. 立即执行一次分区确保 +-- ============================================================ +CALL sp_ensure_partitions(); + +-- ============================================================ +-- 8. 更新 MariaDB 事件:每月1日凌晨2:00执行 +-- ============================================================ +SET GLOBAL event_scheduler = ON; +DROP EVENT IF EXISTS ev_ensure_partitions; +CREATE EVENT IF NOT EXISTS ev_ensure_partitions +ON SCHEDULE + EVERY 1 MONTH + STARTS TIMESTAMP(DATE_FORMAT(DATE_ADD(CURDATE(), INTERVAL 1 MONTH), '%Y-%m-01 02:00:00')) +DO + CALL sp_check_partitions(); diff --git a/src/CncCollector/Jobs/LogCleanupJob.cs b/src/CncCollector/Jobs/LogCleanupJob.cs index 76777ba..1764b84 100644 --- a/src/CncCollector/Jobs/LogCleanupJob.cs +++ b/src/CncCollector/Jobs/LogCleanupJob.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using Dapper; using MySqlConnector; using CncCollector.Config; @@ -8,7 +9,8 @@ namespace CncCollector.Jobs { /// /// 日志清理定时任务。 - /// 根据配置的保留天数清理日志数据。保留天数=0表示不删除。 + /// 对按月分区表使用 DROP PARTITION 清理(瞬间完成), + /// 对非分区表回退到 DELETE。保留天数=0表示不删除。 /// public class LogCleanupJob { @@ -30,55 +32,120 @@ namespace CncCollector.Jobs { try { - int total = 0; + int totalPartitions = 0; + int totalRows = 0; using (var conn = new MySqlConnection(_logConnection)) { - // 1) 采集分析日志 + // 1) 采集分析日志(分区表:DROP PARTITION) int daysA = Math.Max(_config.AnalysisLogRetentionDays, 0); if (daysA > 0) { - string sqlA = $"DELETE FROM cnc_log.log_collect_analysis WHERE analysis_time < DATE_SUB(NOW(), INTERVAL {daysA} DAY)"; - int del = conn.Execute(sqlA); - total += del; - _log.Info($"日志清理: log_collect_analysis 删除 {del} 行,保留 {daysA} 天"); + int dropped = DropOldPartitions(conn, "log_collect_analysis", "analysis_time", daysA); + if (dropped > 0) + { + totalPartitions += dropped; + _log.Info($"日志清理: log_collect_analysis DROP {dropped} 个分区,保留 {daysA} 天"); + } } - // 2) 采集周期日志 + // 2) 采集周期日志(分区表:DROP PARTITION) int daysC = Math.Max(_config.CycleLogRetentionDays, 0); if (daysC > 0) { - string sqlC = $"DELETE FROM cnc_log.log_collect_cycle WHERE cycle_time < DATE_SUB(NOW(), INTERVAL {daysC} DAY)"; - int del = conn.Execute(sqlC); - total += del; - _log.Info($"日志清理: log_collect_cycle 删除 {del} 行,保留 {daysC} 天"); + int dropped = DropOldPartitions(conn, "log_collect_cycle", "cycle_time", daysC); + if (dropped > 0) + { + totalPartitions += dropped; + _log.Info($"日志清理: log_collect_cycle DROP {dropped} 个分区,保留 {daysC} 天"); + } } - // 3) 原始日志 + // 3) 原始日志(分区表:DROP PARTITION) int daysR = Math.Max(_config.RawLogRetentionDays, 0); if (daysR > 0) { - // 尝试使用 created_at 字段,如不存在再回退到 request_time - string sqlR = $"DELETE FROM cnc_log.log_collect_raw WHERE created_at < DATE_SUB(NOW(), INTERVAL {daysR} DAY)"; - int del = 0; + int dropped = DropOldPartitions(conn, "log_collect_raw", "request_time", daysR); + if (dropped > 0) + { + totalPartitions += dropped; + _log.Info($"日志清理: log_collect_raw DROP {dropped} 个分区,保留 {daysR} 天"); + } + } + } + _log.Info($"日志清理完成,DROP {totalPartitions} 个分区"); + } + catch (Exception ex) + { + _log.Error("执行日志清理任务失败", ex); + } + } + + /// + /// 清理过期的月分区。计算保留天数对应的截止月份, + /// 找出所有分区边界早于截止月份的分区(排除p_future),执行 DROP PARTITION。 + /// + private int DropOldPartitions(MySqlConnection conn, string tableName, string partitionColumn, int retentionDays) + { + int dropped = 0; + try + { + // 截止日期:保留天数之前的日期 + var cutoffDate = DateTime.Now.AddDays(-retentionDays); + // 截止月份的第一天(整个月都要删除) + var cutoffMonth = new DateTime(cutoffDate.Year, cutoffDate.Month, 1); + + // 查询所有非 p_future 分区及其边界值 + var partitions = conn.Query<(string PARTITION_NAME, string PARTITION_DESCRIPTION)>( + @"SELECT PARTITION_NAME, PARTITION_DESCRIPTION + FROM information_schema.PARTITIONS + WHERE TABLE_SCHEMA = 'cnc_log' + AND TABLE_NAME = @TableName + AND PARTITION_NAME IS NOT NULL + AND PARTITION_NAME <> 'p_future' + ORDER BY PARTITION_ORDINAL_POSITION", + new { TableName = tableName }); + + foreach (var part in partitions) + { + // PARTITION_DESCRIPTION 是 TO_DAYS('YYYY-MM-DD') 的整数值 + if (!long.TryParse(part.PARTITION_DESCRIPTION, out long toDaysValue)) + continue; + + // 将 TO_DAYS 值反算为日期(近似:用 MySQL 的 FROM_DAYS) + DateTime partitionBoundary; + try + { + partitionBoundary = conn.ExecuteScalar( + "SELECT FROM_DAYS(@ToDays)", new { ToDays = toDaysValue }); + } + catch + { + // 无法解析边界值,跳过此分区 + _log.Warn($"无法解析分区边界值: {tableName}.{part.PARTITION_NAME} = {part.PARTITION_DESCRIPTION}"); + continue; + } + + // 如果分区边界 <= 截止月份,说明这个分区整月都在保留期外,可以 DROP + if (partitionBoundary <= cutoffMonth) + { try { - del = conn.Execute(sqlR); + conn.Execute($"ALTER TABLE cnc_log.{tableName} DROP PARTITION {part.PARTITION_NAME}"); + dropped++; + _log.Info($"DROP PARTITION: {tableName}.{part.PARTITION_NAME} (边界={partitionBoundary:yyyy-MM-dd})"); } - catch + catch (Exception ex) { - string sqlR2 = $"DELETE FROM cnc_log.log_collect_raw WHERE request_time < DATE_SUB(NOW(), INTERVAL {daysR} DAY)"; - del = conn.Execute(sqlR2); + _log.Error($"DROP PARTITION 失败: {tableName}.{part.PARTITION_NAME}", ex); } - total += del; - _log.Info($"日志清理: log_collect_raw 删除 {del} 行,保留 {daysR} 天"); } } - _log.Info($"日志清理完成,总删除记录数: {total}"); } catch (Exception ex) { - _log.Error("执行日志清理任务失败", ex); + _log.Error($"清理分区表 {tableName} 时出错", ex); } + return dropped; } } }