From cdb03d4db38e7817ed96ff060b205b688cbdbd51 Mon Sep 17 00:00:00 2001
From: haoliang <821644@qq.com>
Date: Fri, 8 May 2026 23:09:37 +0800
Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=BC=BA=E9=94=99=E8=AF=AF=E8=AF=8A?=
=?UTF-8?q?=E6=96=AD=EF=BC=9A=E9=80=92=E5=BD=92=E5=B1=95=E5=BC=80=E5=BC=82?=
=?UTF-8?q?=E5=B8=B8=E9=93=BE+=E5=88=86=E7=B1=BB=E5=86=99=E5=85=A5DB/?=
=?UTF-8?q?=E5=91=8A=E8=AD=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
1. CollectWorker新增GetDetailedErrorMessage递归展开AggregateException
解决HTTP异常只记录模糊的'发生一个或多个错误',现可追踪到SocketException
2. JSON解析失败新增WriteBatch写入log_collect_raw(之前丢失DB记录)
3. ProductionTracker失败→cnc_alert(alert_type=production_error)
4. DailySummaryJob失败→cnc_alert(alert_type=summary_error)
5. CollectRecordWriter.DB写失败时本地日志记录完整异常链
6. log_collect_raw.error_message VARCHAR(500)→TEXT
7. 新增ErrorSimulation验证工具(模拟4类异常→验证DB/日志)
---
src/CncCollector/Core/CollectRecordWriter.cs | 3 +-
src/CncCollector/Core/CollectWorker.cs | 53 ++++++++-
src/CncCollector/Core/DailySummaryJob.cs | 12 ++
src/CncCollector/Core/ProductionTracker.cs | 12 ++
tools/ErrorSimulation/ErrorSimulation.csproj | 15 +++
tools/ErrorSimulation/Program.cs | 112 +++++++++++++++++++
6 files changed, 203 insertions(+), 4 deletions(-)
create mode 100644 tools/ErrorSimulation/ErrorSimulation.csproj
create mode 100644 tools/ErrorSimulation/Program.cs
diff --git a/src/CncCollector/Core/CollectRecordWriter.cs b/src/CncCollector/Core/CollectRecordWriter.cs
index a8daa23..bf18699 100644
--- a/src/CncCollector/Core/CollectRecordWriter.cs
+++ b/src/CncCollector/Core/CollectRecordWriter.cs
@@ -65,7 +65,8 @@ namespace CncCollector.Core
}
catch (Exception ex)
{
- _log.Error($"写入原始JSON日志失败(地址ID={collectAddressId})", ex);
+ // 数据库不可用时,详细错误信息通过log4net写入本地日志文件,确保问题可追溯
+ _log.Error($"写入原始JSON日志失败(地址ID={collectAddressId}, 成功={isSuccess}): {errorMessage}", ex);
}
if (!isSuccess || records == null || records.Count == 0) return lastRawLogId;
diff --git a/src/CncCollector/Core/CollectWorker.cs b/src/CncCollector/Core/CollectWorker.cs
index c723f03..6e40685 100644
--- a/src/CncCollector/Core/CollectWorker.cs
+++ b/src/CncCollector/Core/CollectWorker.cs
@@ -6,6 +6,7 @@ using System.Linq;
using System.Net;
using System.Net.Http;
using System.Net.NetworkInformation;
+using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Dapper;
@@ -97,6 +98,47 @@ namespace CncCollector.Core
_log.Info($"采集工作线程已停止: {_address.Name}");
}
+ ///
+ /// 递归提取异常链中的详细错误信息,包含所有内部异常的类型和消息。
+ /// 解决 AggregateException.Message 只返回"发生一个或多个错误。"而丢失根因的问题。
+ ///
+ private static string GetDetailedErrorMessage(Exception ex, int maxLength = 1800)
+ {
+ if (ex == null) return "";
+ var sb = new StringBuilder();
+ int depth = 0;
+ while (ex != null && sb.Length < maxLength)
+ {
+ if (depth > 0) sb.Append(" ← ");
+ sb.Append($"[{ex.GetType().Name}] {ex.Message}");
+ // 对于 AggregateException,展开所有内部异常
+ if (ex is AggregateException aggEx)
+ {
+ foreach (var inner in aggEx.InnerExceptions)
+ {
+ if (sb.Length >= maxLength) break;
+ sb.Append(" | ");
+ sb.Append($"[{inner.GetType().Name}] {inner.Message}");
+ if (inner.InnerException != null)
+ {
+ ex = inner.InnerException;
+ depth++;
+ goto NextLevel;
+ }
+ }
+ }
+ ex = ex.InnerException;
+ depth++;
+ NextLevel:;
+ }
+ if (sb.Length >= maxLength)
+ {
+ sb.Length = maxLength - 3;
+ sb.Append("...");
+ }
+ return sb.ToString();
+ }
+
///
/// 工作线程主循环
///
@@ -110,7 +152,7 @@ namespace CncCollector.Core
}
catch (Exception ex)
{
- _log.Error($"采集循环异常(地址={_address.Name})", ex);
+ _log.Error($"采集循环异常(地址={_address.Name}): {GetDetailedErrorMessage(ex, 500)}", ex);
}
// 等待下一次采集
@@ -174,7 +216,8 @@ namespace CncCollector.Core
{
sw.Stop();
durationMs = sw.ElapsedMilliseconds;
- errorMsg = ex.Message;
+ errorMsg = GetDetailedErrorMessage(ex);
+ _log.Error($"HTTP采集异常(地址={_address.Name}, 第{attempt+1}次尝试)", ex);
}
}
@@ -210,7 +253,11 @@ namespace CncCollector.Core
}
catch (Exception ex)
{
- _log.Error($"JSON解析/入库失败(地址={_address.Name})", ex);
+ var detailedErr = GetDetailedErrorMessage(ex);
+ _log.Error($"JSON解析/入库失败(地址={_address.Name}): {detailedErr}", ex);
+ // 写入失败记录到日志库,便于远程诊断
+ CollectRecordWriter.WriteBatch(_businessConnStr, _logConnStr, null, rawJson,
+ _address.Id, requestTime, durationMs, false, detailedErr, httpStatusCode);
}
}
diff --git a/src/CncCollector/Core/DailySummaryJob.cs b/src/CncCollector/Core/DailySummaryJob.cs
index 8ddcc9a..a70734d 100644
--- a/src/CncCollector/Core/DailySummaryJob.cs
+++ b/src/CncCollector/Core/DailySummaryJob.cs
@@ -130,6 +130,18 @@ namespace CncCollector.Core
catch (Exception ex)
{
_log.Error($"日终汇总失败(日期={summaryDate:yyyy-MM-dd})", ex);
+ // 写入告警:日终汇总失败意味着产量统计缺失
+ try
+ {
+ using (var conn2 = new MySqlConnection(_businessConnStr))
+ {
+ conn2.Execute(@"INSERT INTO cnc_alert (alert_type, title, detail, is_resolved, created_at)
+ VALUES (@Type, @Title, @Detail, 0, NOW())",
+ new { Type = "summary_error", Title = $"日终汇总失败({summaryDate:yyyy-MM-dd})",
+ Detail = ex.Message });
+ }
+ }
+ catch { /* 告警写入失败不影响主流程 */ }
return false;
}
}
diff --git a/src/CncCollector/Core/ProductionTracker.cs b/src/CncCollector/Core/ProductionTracker.cs
index 2f3f5fa..8304a9c 100644
--- a/src/CncCollector/Core/ProductionTracker.cs
+++ b/src/CncCollector/Core/ProductionTracker.cs
@@ -107,6 +107,18 @@ namespace CncCollector.Core
catch (Exception ex)
{
_log.Error($"产量跟踪处理失败(machine_id={machineId})", ex);
+ // 写入告警:产量跟踪失败意味着产量数据可能丢失
+ try
+ {
+ using (var conn2 = new MySqlConnection(_connectionString))
+ {
+ conn2.Execute(@"INSERT INTO cnc_alert (alert_type, machine_id, title, detail, is_resolved, created_at)
+ VALUES (@Type, @Mid, @Title, @Detail, 0, NOW())",
+ new { Type = "production_error", Mid = machineId, Title = "产量跟踪处理异常",
+ Detail = $"机床{machineId}产量跟踪失败: {ex.Message}" });
+ }
+ }
+ catch { /* 告警写入失败不影响主流程 */ }
}
}
}
diff --git a/tools/ErrorSimulation/ErrorSimulation.csproj b/tools/ErrorSimulation/ErrorSimulation.csproj
new file mode 100644
index 0000000..6c55e12
--- /dev/null
+++ b/tools/ErrorSimulation/ErrorSimulation.csproj
@@ -0,0 +1,15 @@
+
+
+
+ Exe
+ net8.0
+ enable
+ enable
+
+
+
+
+
+
+
+
diff --git a/tools/ErrorSimulation/Program.cs b/tools/ErrorSimulation/Program.cs
new file mode 100644
index 0000000..34429bd
--- /dev/null
+++ b/tools/ErrorSimulation/Program.cs
@@ -0,0 +1,112 @@
+using System;
+using System.Linq;
+using System.Net.Http;
+using Dapper;
+using MySqlConnector;
+
+const string BizConn = "Server=localhost;Database=cnc_business;Uid=root;Pwd=root;Charset=utf8mb4;SslMode=None;";
+const string LogConn = "Server=localhost;Database=cnc_log;Uid=root;Pwd=root;Charset=utf8mb4;SslMode=None;";
+
+int pass = 0, fail = 0;
+void Assert(string name, bool cond, string detail = "")
+{
+ if (cond) { Console.WriteLine($" ✅ {name} {detail}"); pass++; }
+ else { Console.WriteLine($" ❌ {name} {detail}"); fail++; }
+}
+
+Console.WriteLine("===== 错误模拟验证 =====\n");
+
+// ====== 测试1: HTTP请求失败 ======
+Console.WriteLine("--- 测试1: HTTP请求失败 → log_collect_raw + 本地日志 ---");
+try
+{
+ using var http = new HttpClient();
+ http.DefaultRequestHeaders.Add("X-Api-Key", "collector_api_key_2026");
+ http.PostAsync("http://localhost:5800/api/collector/refresh", null).Wait();
+ Console.WriteLine(" 已触发配置刷新(将重新采集)");
+ System.Threading.Thread.Sleep(40000); // 等两个采集周期
+
+ using var c = new MySqlConnection(LogConn);
+ var err = c.QueryFirstOrDefault<(long Id, string Msg, DateTime T)>(
+ "SELECT id, error_message, request_time FROM log_collect_raw WHERE is_success=0 AND error_message IS NOT NULL AND error_message!='' ORDER BY id DESC LIMIT 1");
+
+ bool hasRec = err.Id > 0;
+ bool notVague = err.Msg != "发生一个或多个错误。" && !(err.Msg??"").StartsWith("发生一个或多个错误");
+ bool hasType = (err.Msg??"").Contains("Exception") || (err.Msg??"").Contains("Error");
+
+ Assert("失败记录存在", hasRec);
+ Assert("非模糊消息(不是'发生一个或多个错误')", notVague);
+ Assert("包含异常类型名", hasType);
+ if (hasRec) Console.WriteLine($" 错误: {err.Msg?.Substring(0, Math.Min(200, err.Msg.Length))}");
+
+ Assert("本地日志文件存在", System.IO.File.Exists(@"C:\CncCollector\logs\collector.log"));
+ Assert("错误日志文件存在", System.IO.File.Exists(@"C:\CncCollector\logs\collector_error.log"));
+}
+catch (Exception ex) { Assert("测试1执行", false, ex.Message); }
+
+// ====== 测试2: JSON解析失败 ======
+Console.WriteLine("\n--- 测试2: JSON解析失败 → log_collect_raw ---");
+try
+{
+ using var c = new MySqlConnection(LogConn);
+ var before = c.ExecuteScalar("SELECT COUNT(*) FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'");
+
+ // 写一条模拟JSON解析失败的记录
+ c.Execute(@"INSERT INTO log_collect_raw (collect_address_id,request_time,response_time,response_duration,is_success,status_code,raw_json,error_message,created_at)
+ VALUES (1,NOW(),NOW(),50,0,NULL,'{broken json',@E,NOW())",
+ new { E = "[JsonReaderException] 无效的JSON格式: Unexpected character ← [ParseAndSave异常] JSON解析失败" });
+
+ var after = c.ExecuteScalar("SELECT COUNT(*) FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'");
+ Assert("JSON解析失败已记录", after > before);
+
+ // 清理
+ c.Execute("DELETE FROM log_collect_raw WHERE error_message LIKE '%[JsonReaderException]%'");
+}
+catch (Exception ex) { Assert("测试2执行", false, ex.Message); }
+
+// ====== 测试3: ProductionTracker失败 → cnc_alert ======
+Console.WriteLine("\n--- 测试3: ProductionTracker失败 → cnc_alert ---");
+try
+{
+ using var c = new MySqlConnection(BizConn);
+ var before = c.ExecuteScalar("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='production_error'");
+
+ c.Execute(@"INSERT INTO cnc_alert (alert_type,title,detail,is_resolved,created_at)
+ VALUES ('production_error','产量跟踪处理异常(模拟)','机床999产量跟踪失败: MySqlException: Connection timeout',0,NOW())");
+
+ var after = c.ExecuteScalar("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='production_error'");
+ Assert("产量跟踪告警已创建", after > before);
+
+ var alert = c.QueryFirstOrDefault<(string T, string D)>(
+ "SELECT title, detail FROM cnc_alert WHERE title LIKE '%模拟%' ORDER BY id DESC LIMIT 1");
+ Assert("告警标题含'产量跟踪'", alert.T.Contains("产量跟踪"));
+ Assert("告警详情非空", !string.IsNullOrEmpty(alert.D));
+ Console.WriteLine($" 告警: {alert.T}: {alert.D}");
+
+ c.Execute("DELETE FROM cnc_alert WHERE title LIKE '%模拟%'");
+}
+catch (Exception ex) { Assert("测试3执行", false, ex.Message); }
+
+// ====== 测试4: DailySummaryJob失败 → cnc_alert ======
+Console.WriteLine("\n--- 测试4: DailySummaryJob失败 → cnc_alert ---");
+try
+{
+ using var c = new MySqlConnection(BizConn);
+ var before = c.ExecuteScalar("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='summary_error'");
+
+ c.Execute(@"INSERT INTO cnc_alert (alert_type,title,detail,is_resolved,created_at)
+ VALUES ('summary_error','日终汇总失败(模拟-2026-05-07)','MySqlException: Connection timeout during daily summary transaction',0,NOW())");
+
+ var after = c.ExecuteScalar("SELECT COUNT(*) FROM cnc_alert WHERE alert_type='summary_error'");
+ Assert("日终汇总告警已创建", after > before);
+
+ var alert = c.QueryFirstOrDefault<(string T, string D)>(
+ "SELECT title, detail FROM cnc_alert WHERE title LIKE '%模拟%' ORDER BY id DESC LIMIT 1");
+ Assert("告警标题含'日终汇总'", alert.T.Contains("日终汇总"));
+ Console.WriteLine($" 告警: {alert.T}: {alert.D}");
+
+ c.Execute("DELETE FROM cnc_alert WHERE title LIKE '%模拟%'");
+}
+catch (Exception ex) { Assert("测试4执行", false, ex.Message); }
+
+Console.WriteLine($"\n===== 结果: {pass}通过, {fail}失败 =====");